1. Load California map and packages

##### Libraries for sptial information in map, map projection and map ploting
library(maps)
#Import California map with counties
ca.county = map("county","california", fill=TRUE, plot=FALSE)
library(mapproj)
library(spdep)
library(maptools)
library(classInt)
library(RColorBrewer)
library(ggpubr)

##### Libraries for matrix, distribution and math computation
library(mvtnorm)
library(matrixStats)
library(blockmatrix)
library(Matrix)
library(invgamma)
library(mnormt)
library(MASS)
library(Hmisc) #rcorr
library(gtools) #permutation
#library(fields)
#library(msos)
#library(boot)
#library(ggmcmc)

#library(mcmc)
#library(magic)

#library(AICcmodavg)
#library(coda)

##### Libraries for JAGS
library(rjags)
library(R2jags)

##### Draw plots
library(ggplot2)

##### Data management
library(tidyr)

##### Import and export documents
library(knitr)
library(readr)

##### Bayesian inference
library(LaplacesDemon)

##### Model selection
library(bridgesampling)

2. Dataset exploration and visualization

2.1 Import dataset (county-level) extracted from SEER

Cancers: lung, esophageal, larynx and colorectal

Outcome: Standard incidence ratios (SMR\(_{id} = Y_{id}/E_{id}\)) in the years from 2012 to 2016.

\(Y_{id}\) - observed counts of incidence for each cancer \(d\) in each county \(i\)

\(E_{id}\) - expected number of cases for each cancer \(d\) in each county \(i\)

Covariates (percentage): adult cigarette smoking rates, percentages of residents younger than 18 years old, older than 65 years old, with education level below high school, percentages of unemployed residents, black residents, male residents, uninsured residents, and percentages of families below the poverty threshold

#Import covariates
covariates <- read.csv("data/covariates.csv")
race <- read.csv("data/race.csv")
sex <- read.csv("data/sex.csv")
insurance <- read.csv("data/insurance.csv")
smoking <- read.csv("data/smoking.csv")
smoking$smoking <- as.numeric(substr(smoking$Cigarette.Smoking.Rate., 1,4))

#Import age-adjusted incidence rates for 4 cancers in California
rate_5y <- read.csv("data/age_adjusted.csv")
rate_CA = rate_5y[substr(rate_5y$State_county,1,2) == "CA",]

rate_lung = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Lung and Bronchus",]
rate_lung = rate_lung[order(readr::parse_number(as.character(rate_lung$State_county))),]
rate_lung$E_count = (sum(rate_lung$Count) / sum(rate_lung$Population)) * rate_lung$Population
rate_lung$standard_ratio = rate_lung$Count / rate_lung$E_count

rate_esophagus = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Esophagus",]
rate_esophagus = rate_esophagus[order(readr::parse_number(as.character(rate_esophagus$State_county))),]
rate_esophagus$E_count = (sum(rate_esophagus$Count) / sum(rate_esophagus$Population)) * rate_esophagus$Population
rate_esophagus$standard_ratio = rate_esophagus$Count / rate_esophagus$E_count

rate_larynx = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Larynx",]
rate_larynx = rate_larynx[order(readr::parse_number(as.character(rate_larynx$State_county))),]
rate_larynx$E_count = (sum(rate_larynx$Count) / sum(rate_larynx$Population)) * rate_larynx$Population
rate_larynx$standard_ratio = rate_larynx$Count / rate_larynx$E_count

rate_colrect = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Colon and Rectum",]
rate_colrect = rate_colrect[order(readr::parse_number(as.character(rate_colrect$State_county))),]
rate_colrect$E_count = (sum(rate_colrect$Count) / sum(rate_colrect$Population)) * rate_colrect$Population
rate_colrect$standard_ratio = rate_colrect$Count / rate_colrect$E_count

################## Data construction ###########

## Data

# Covariates in percentage
county_attribute = covariates[substr(covariates$State_county,1,2) == "CA",]
county_attribute$state = extract_numeric(county_attribute$State_county)
county_attribute1 = data.frame(county_attribute[order(county_attribute$state),])
county_attribute1$V_Persons_age_18_ACS_2012_2016 = as.numeric(county_attribute1$V_Persons_age_18_ACS_2012_2016)/100
county_attribute1$V_Persons_age_65_ACS_2012_2016 = as.numeric(county_attribute1$V_Persons_age_65_ACS_2012_2016)/100
county_attribute1$VHighschooleducationACS2012201 = as.numeric(county_attribute1$VHighschooleducationACS2012201)/100
county_attribute1$VFamiliesbelowpovertyACS201220 = as.numeric(county_attribute1$VFamiliesbelowpovertyACS201220)/100
county_attribute1$V_Unemployed_ACS_2012_2016 = as.numeric(county_attribute1$V_Unemployed_ACS_2012_2016)/100

race1 = race[substr(race$State_county,1,2) == "CA"&race$Race_recode_White_Black_Other=="Black",]
sex1 = sex[substr(sex$State_county,1,2) == "CA"&sex$Sex=="Male",]
insurance1 = insurance[substr(insurance$State_county,1,2) == "CA"&insurance$Insurance_Recode_2007=="Uninsured",]

rate_lung1 = cbind(rate_lung, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_lung1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_esophagus1 = cbind(rate_esophagus, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_esophagus1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_larynx1 = cbind(rate_larynx, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_larynx1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_colrect1 = cbind(rate_colrect, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_colrect1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")

2.2 Combine cancer data with the California map for each county

To plot the cancer data, we need to combine the data with the map polygon

#County information
county.ID <- sapply(strsplit(ca.county$names, ","), function(x) x[2])
ca.poly = map2SpatialPolygons(ca.county, IDs=county.ID)
ca.poly$rate_lung = rate_lung$standard_ratio
ca.poly$rate_esophagus = rate_esophagus$standard_ratio
ca.poly$rate_larynx = rate_larynx$standard_ratio
ca.poly$rate_colrect = rate_colrect$standard_ratio
ca.poly$smoking = smoking$smoking

ca.coords = coordinates(ca.poly)

2.3 Plots for raw incidence data and important covariates

##################### Plot SIR in areal map ##################
brks_fit_lung = quantile(ca.poly$rate_lung, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_esophagus = quantile(ca.poly$rate_esophagus, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_larynx = quantile(ca.poly$rate_larynx, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_colrect = quantile(ca.poly$rate_colrect, c(0, 0.2, 0.4, 0.6, 0.8, 1))


color.pallete = rev(brewer.pal(5,"RdBu"))
class.rate_lung = classIntervals(var=ca.poly$rate_lung, n=5, style="fixed", 
                                 fixedBreaks=brks_fit_lung, dataPrecision=4)
class.rate_esophagus = classIntervals(var=ca.poly$rate_esophagus, n=5, style="fixed", 
                                      fixedBreaks=brks_fit_esophagus, dataPrecision=4)
class.rate_larynx = classIntervals(var=ca.poly$rate_larynx, n=5, style="fixed", 
                                   fixedBreaks=brks_fit_larynx, dataPrecision=4)
class.rate_colrect = classIntervals(var=ca.poly$rate_colrect, n=5, style="fixed", 
                                    fixedBreaks=brks_fit_colrect, dataPrecision=4)
color.code.rate_lung = findColours(class.rate_lung, color.pallete)
color.code.rate_esophagus = findColours(class.rate_esophagus, color.pallete)
color.code.rate_larynx = findColours(class.rate_larynx, color.pallete)
color.code.rate_colrect = findColours(class.rate_colrect, color.pallete)


par(mfrow=c(2,2), oma = c(0,0,4,0) + 0.1, mar = c(0,0,2,0) + 0.1)

plot(ca.poly, col = color.code.rate_lung, main = "Lung cancer")

plot(ca.poly, col = color.code.rate_esophagus, main = "Esophageal cancer")

plot(ca.poly, col = color.code.rate_larynx, main = "larynx cancer")

plot(ca.poly, col = color.code.rate_colrect, main = "colorectal cancer")
leg.txt = c("0-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
mtext("Figure 1. Maps of standardized incidence ratios (SIR) for lung, esophageal, larynx and colocrectal cancer in California, 2012 - 2016", outer = TRUE, cex = 1)
par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend("center", legend=leg.txt, xpd = TRUE, cex=1.5, bty="n", horiz = FALSE, 
       fill = color.pallete)

################## Plot smoking rates in areal map ##################
brks_fit_smoking = quantile(ca.poly$smoking, c(0, 0.2, 0.4, 0.6, 0.8, 1))

class.rate_smoking = classIntervals(var=ca.poly$smoking, n=7, style="fixed", 
                                    fixedBreaks=brks_fit_smoking, dataPrecision=4)
color.code.rate_smoking = findColours(class.rate_smoking, color.pallete)

par(oma = c(0,0,4,0) + 0.1, mar = c(0,0,2,0) + 0.1)
plot(ca.poly, col = color.code.rate_smoking, main = "Figure 2. Map of adult cigarette smoking rates in California, 2014 - 2016", size = 1)
leg.txt = c("0-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
legend("bottomleft", legend=leg.txt, xpd = TRUE, cex=1, bty="n", horiz = FALSE, 
       fill = color.pallete)

2.4 Create adjacency matrix for spatial information in directed graph (ordered regions)

The matrix ``Adj’’ is the adjacency matrix for counties in California. We reorder the counties starting from northwest to southeast for directed graph. Then we put all the islands (counties with no parent neighbors) at the beginning. There are \(58\) counties in California.

ca.neighbors = poly2nb(ca.poly)
n=length(ca.neighbors)

# original adjacency matrix
Adj=sapply(ca.neighbors,function(x,n) {v=rep(0,n);v[x]=1;v},n)
colnames(Adj)=county.ID
ca.coord = coordinates(ca.poly)
ca.latrange=round(quantile(ca.coord[,2],c(0.25,0.75)))
ca.albersproj=mapproject(ca.coord[,1],ca.coord[,2],projection = "albers",param=ca.latrange)

# reorder counties from northwest to southeast
perm=order(ca.albersproj$x-ca.albersproj$y)
Adj_new=Adj[perm,perm]

n=nrow(Adj_new)
ni=rowSums(Adj_new)
maxn=max(ni)
neimat=matrix(0,n,maxn)
neighbors=lapply(1:n,function(x) which(Adj_new[x,]==1))
dneighbors=sapply(2:n,function(i) intersect(neighbors[[i]],1:(i-1)))
dni=sapply(dneighbors,length)
original_perm = 1:58
index2=c(1,which(dni==0)+1)

# final permutation for counties: put all the islands (counties with no parent neighbors) at the beginning
final_perm=c(original_perm[perm][index2],
             original_perm[perm][-index2])
colnames(Adj)[final_perm]
##  [1] "del norte"       "san francisco"   "humboldt"        "siskiyou"       
##  [5] "trinity"         "shasta"          "modoc"           "mendocino"      
##  [9] "tehama"          "glenn"           "lake"            "lassen"         
## [13] "colusa"          "butte"           "sonoma"          "plumas"         
## [17] "napa"            "sutter"          "yuba"            "marin"          
## [21] "yolo"            "sierra"          "nevada"          "solano"         
## [25] "placer"          "sacramento"      "contra costa"    "san mateo"      
## [29] "el dorado"       "alameda"         "amador"          "san joaquin"    
## [33] "santa cruz"      "calaveras"       "santa clara"     "alpine"         
## [37] "stanislaus"      "tuolumne"        "merced"          "mariposa"       
## [41] "san benito"      "monterey"        "mono"            "madera"         
## [45] "fresno"          "kings"           "san luis obispo" "tulare"         
## [49] "santa barbara"   "inyo"            "kern"            "ventura"        
## [53] "los angeles"     "orange"          "san bernardino"  "riverside"      
## [57] "san diego"       "imperial"
Minc = Adj[final_perm,final_perm]
n=nrow(Minc)
ni=rowSums(Minc)
maxn=max(ni)
neimat=matrix(0,n,maxn)
neighbors=lapply(1:n,function(x) which(Minc[x,]==1))
#directed or parent neighbors
dneighbors=sapply(2:n,function(i) intersect(neighbors[[i]],1:(i-1)))
dni=sapply(dneighbors,length)
nmax=max(dni)
cni=cumsum(dni)
dneimat=sapply(dneighbors, function(nei,nmax,n) c(nei,rep(n+1,nmax+1-length(nei))),nmax,n)
udnei=unlist(dneighbors)
ni_wo = sapply(neighbors,length)
cni_wo = cumsum(ni_wo)
udnei_wo = unlist(neighbors)
cn = c(0, cni)
ns = dni

region = seq(1:n)
cn = c(0, cni)
ns = dni
index = list()
for(i in 1:(n-2)){
  index[[i]] = region[-(udnei[(cn[i+1] + 1):(cn[i+1] + ns[i+1])])]
}
index1 = unlist(index)

2.5 Pairs of neighboring counties

Here are a total of 139 pairs of neighboring counties in California.

# Neighboring counties information
neighborvec0 = NULL
neighbor_list0 = NULL
neighbor_name = NULL
for(i in 1:(n-1)){
  for(j in (i+1):n){
    if(Adj[i,j] == 1){
      neighborvec0 = c(neighborvec0, paste(i, ",", j, sep=""))
      neighbor_list0 = rbind(neighbor_list0, c(i,j))
      neighbor_name = c(neighbor_name, paste(colnames(Adj)[i], ", ", colnames(Adj)[j], sep=""))
    }
  }
}
neighbor_name
##   [1] "alameda, contra costa"          "alameda, san joaquin"          
##   [3] "alameda, santa clara"           "alameda, stanislaus"           
##   [5] "alpine, amador"                 "alpine, calaveras"             
##   [7] "alpine, el dorado"              "alpine, mono"                  
##   [9] "alpine, tuolumne"               "amador, calaveras"             
##  [11] "amador, el dorado"              "amador, sacramento"            
##  [13] "amador, san joaquin"            "butte, colusa"                 
##  [15] "butte, glenn"                   "butte, plumas"                 
##  [17] "butte, sutter"                  "butte, tehama"                 
##  [19] "butte, yuba"                    "calaveras, san joaquin"        
##  [21] "calaveras, stanislaus"          "calaveras, tuolumne"           
##  [23] "colusa, glenn"                  "colusa, lake"                  
##  [25] "colusa, sutter"                 "colusa, yolo"                  
##  [27] "contra costa, sacramento"       "contra costa, san joaquin"     
##  [29] "contra costa, solano"           "del norte, humboldt"           
##  [31] "del norte, siskiyou"            "el dorado, placer"             
##  [33] "el dorado, sacramento"          "fresno, inyo"                  
##  [35] "fresno, kings"                  "fresno, madera"                
##  [37] "fresno, merced"                 "fresno, mono"                  
##  [39] "fresno, monterey"               "fresno, san benito"            
##  [41] "fresno, tulare"                 "glenn, lake"                   
##  [43] "glenn, mendocino"               "glenn, tehama"                 
##  [45] "humboldt, mendocino"            "humboldt, siskiyou"            
##  [47] "humboldt, trinity"              "imperial, riverside"           
##  [49] "imperial, san diego"            "inyo, kern"                    
##  [51] "inyo, mono"                     "inyo, san bernardino"          
##  [53] "inyo, tulare"                   "kern, kings"                   
##  [55] "kern, los angeles"              "kern, monterey"                
##  [57] "kern, san bernardino"           "kern, san luis obispo"         
##  [59] "kern, santa barbara"            "kern, tulare"                  
##  [61] "kern, ventura"                  "kings, monterey"               
##  [63] "kings, san luis obispo"         "kings, tulare"                 
##  [65] "lake, mendocino"                "lake, napa"                    
##  [67] "lake, sonoma"                   "lake, yolo"                    
##  [69] "lassen, modoc"                  "lassen, plumas"                
##  [71] "lassen, shasta"                 "lassen, sierra"                
##  [73] "los angeles, orange"            "los angeles, san bernardino"   
##  [75] "los angeles, ventura"           "madera, mariposa"              
##  [77] "madera, merced"                 "madera, mono"                  
##  [79] "madera, tuolumne"               "marin, sonoma"                 
##  [81] "mariposa, merced"               "mariposa, stanislaus"          
##  [83] "mariposa, tuolumne"             "mendocino, sonoma"             
##  [85] "mendocino, tehama"              "mendocino, trinity"            
##  [87] "merced, san benito"             "merced, santa clara"           
##  [89] "merced, stanislaus"             "merced, tuolumne"              
##  [91] "modoc, shasta"                  "modoc, siskiyou"               
##  [93] "mono, tuolumne"                 "monterey, san benito"          
##  [95] "monterey, san luis obispo"      "monterey, santa cruz"          
##  [97] "napa, solano"                   "napa, sonoma"                  
##  [99] "napa, yolo"                     "nevada, placer"                
## [101] "nevada, sierra"                 "nevada, yuba"                  
## [103] "orange, riverside"              "orange, san bernardino"        
## [105] "orange, san diego"              "placer, sacramento"            
## [107] "placer, sutter"                 "placer, yuba"                  
## [109] "plumas, shasta"                 "plumas, sierra"                
## [111] "plumas, tehama"                 "plumas, yuba"                  
## [113] "riverside, san bernardino"      "riverside, san diego"          
## [115] "sacramento, san joaquin"        "sacramento, solano"            
## [117] "sacramento, sutter"             "sacramento, yolo"              
## [119] "san benito, santa clara"        "san benito, santa cruz"        
## [121] "san francisco, san mateo"       "san joaquin, santa clara"      
## [123] "san joaquin, stanislaus"        "san luis obispo, santa barbara"
## [125] "san mateo, santa clara"         "san mateo, santa cruz"         
## [127] "santa barbara, ventura"         "santa clara, santa cruz"       
## [129] "santa clara, stanislaus"        "shasta, siskiyou"              
## [131] "shasta, tehama"                 "shasta, trinity"               
## [133] "sierra, yuba"                   "siskiyou, trinity"             
## [135] "solano, yolo"                   "stanislaus, tuolumne"          
## [137] "sutter, yolo"                   "sutter, yuba"                  
## [139] "tehama, trinity"

2.6 Exploratory analysis for spatial dependence and association across cancers

2.6.1 spatial dependence: Moran’s I

To explore the spatial association for each disease, we calculate Moran’s I based upon rth order neighbors for each cancer and plot the areal correlogram.

# distance matrix
projmat=cbind(ca.albersproj$x,ca.albersproj$y)
dmat=as.matrix(dist(projmat))
# Moran's I function
moranI <- function(y, A){
  n = length(y)
  nom_sum = 0
  den_sum = 0
  for(i in 1:n){
    den_sum = den_sum + (y[i]-mean(y))^2
    for(j in 1:n){
      nom_sum = nom_sum + A[i,j]*(y[i]-mean(y))*(y[j]-mean(y))
    }
  }
  return(n*nom_sum/sum(A)/den_sum)
}

lung_moran = c()
esophagus_moran = c()
larynx_moran = c()
colrect_moran = c()
for(lag in 1:11){
  A_1 <- as.matrix((dmat <= 0.01*lag & dmat > 0.01*(lag-1))*1)
  diag(A_1) = 0
  lung_moran[lag] <- as.numeric(moranI(rate_lung$standard_ratio, A_1))
  esophagus_moran[lag] <- as.numeric(moranI(rate_esophagus$standard_ratio, A_1))
  larynx_moran[lag] <- as.numeric(moranI(rate_larynx$standard_ratio, A_1))
  colrect_moran[lag] <- as.numeric(moranI(rate_colrect$standard_ratio, A_1))
}

moran_value = c(lung_moran, esophagus_moran, larynx_moran, colrect_moran)
cancer = c(rep("Lung", 11), rep("Esophageal", 11), rep("Larynx", 11), rep("Colorectum", 11))
df = data.frame(cancer)
df$moran_value = moran_value
df$r = rep(1:11, 4)
df$cancer = factor(df$cancer, levels = c("Lung", "Esophageal", "Larynx", "Colorectum"))

ggplot(df, aes(r, moran_value)) + geom_point() +
  ylab("Moran's I") + facet_wrap(~cancer) + theme_bw() +
  scale_x_continuous(breaks = 1:11) +
  ggtitle("Figure 3. Moran’s I of rth order neighbors for lung, esophageal, larynx and colorectum cancer") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.title = element_text(size=11))

2.5.2 Association across cancers: Pearson’s correlation matrix
cancer_ratio = cbind(rate_lung$standard_ratio, rate_esophagus$standard_ratio, rate_larynx$standard_ratio, rate_colrect$standard_ratio)
colnames(cancer_ratio) = c("Lung and Bronchus", "Esophageal", "Larynx", "Colon and Rectum")
cor_matrix <- rcorr(cancer_ratio)
cor_matrix
##                   Lung and Bronchus Esophageal Larynx Colon and Rectum
## Lung and Bronchus              1.00       0.80   0.59             0.88
## Esophageal                     0.80       1.00   0.58             0.66
## Larynx                         0.59       0.58   1.00             0.50
## Colon and Rectum               0.88       0.66   0.50             1.00
## 
## n= 58 
## 
## 
## P
##                   Lung and Bronchus Esophageal Larynx Colon and Rectum
## Lung and Bronchus                    0          0      0              
## Esophageal         0                            0      0              
## Larynx             0                 0                 0              
## Colon and Rectum   0                 0          0

3. Hierarchical models for difference boundary detection of the incidence of four cancers

We apply two joint models (MDAGAR and MCAR) and two independent-disease models (DAGAR\(_{ind}\) and CAR\(_{ind}\)) for the multivariate difference boundary detection. We only include intercept for fixed effects and exclude covariates.

The two joint models are implemented using Metropolis within Gibbs Sampler in R, and two independent-disease models are implemented using rjags. For MDAGAR models, we detect difference boundaries for each cancer individually, shared difference boundaries, and cross-cancer boundaries. For MCAR model, we only show difference boundary detection for each cancer individually. For DAGAR\(_{ind}\) and CAR\(_{ind}\) models, the difference boundaries for each cancer individually and shared difference boundaries are detected to compare with joint models. We also calculate D scores to compare the model fitting for four models.

For each model, it takes up to 24 hrs to run. To save time, we include the code for DAGAR\(_{ind}\) and CAR\(_{ind}\) without running for result. You can change the setting by deleting “eval = FALSE” in corresponding code chunk.

3.1 Data in models without covariates

We run models for incidence data without covariates (only include intercept)

Y1 = rate_lung1$count[final_perm]
Y2 = rate_esophagus1$count[final_perm]
Y3 = rate_larynx1$count[final_perm]
Y4 = rate_colrect1$count[final_perm]

E1 = rate_lung1$E_count[final_perm]
E2 = rate_esophagus1$E_count[final_perm]
E3 = rate_larynx1$E_count[final_perm]
E4 = rate_colrect1$E_count[final_perm]

X1 = as.matrix(cbind(1,rate_lung1[,8:14]))[final_perm,]
X2 = as.matrix(cbind(1,rate_esophagus1[,8:14]))[final_perm,]
X3 = as.matrix(cbind(1,rate_larynx1[,8:14]))[final_perm,]
X4 = as.matrix(cbind(1,rate_colrect1[,8:14]))[final_perm,]

Y = c(Y1,Y2,Y3,Y4)
E = c(E1, E2, E3, E4)
X = as.matrix(bdiag(bdiag(X1[,1], X2[,1]), bdiag(X3[,1],X4[,1])))

3.2 Joint models

Functions for Joint models

# compute stick-breaking weights
makeprobs<-function(v){
  m<-length(v) 
  p <-matrix(0,m,m)
  for(j in 2:m){
    p[1:(j-1),j]<-1
  }
  probs<-exp(log(v)+log(1-v)%*%p)
  probs[m]<-1-sum(probs[1:(m-1)])
  probs
}

# compute u weights from F_r and prob 
makeu<- function(F_r,probs){
  
  m1=length(F_r)
  m2=length(probs)
  u=rep(0,m1)
  for (k in 1:m1){
    for (l in 1:m2){
      if (sum(probs[1:(l-1)])<F_r[k] && F_r[k]<sum(probs[1:l])){
        u[k]=l
      }
      if (u[k]==0){
        u[k]=1
      }
    }
  }
  return(u)
}

# Jacobian matrix for updating A
Jacob_A <- function(A){
  prod = 1
  for(i in 1:nrow(A)){
    prod = prod*A[i,i]^(nrow(A)-i+1)
  }
  return(2^nrow(A)*prod)
}
3.2.1 Multivariate Joint DAGAR (MDAGAR) Model: Implement Metropolis within Gibbs Sampler for MCMC updating
# Compute presicion matrices of DAGAR 
Dinv_new <- function(Rho, n, cn, ns, udnei,q){
  Tau <- list()
  B <- list()
  invD <- list()
  for(i in 1:q){
    Tau[[i]] <- diag(n)
    B[[i]] <- matrix(0, n, n)
    for(j in 3:n){
      Tau[[i]][j,j] <- (1 + (ns[j-1] - 1) * Rho[i]^2) / (1 - Rho[i]^2)
      for(k in (udnei[(cn[j-1] + 1):(cn[j-1] + ns[j-1])])){
        B[[i]][j,k] <- Rho[i] / (1 + (ns[j-1] - 1) * Rho[i]^2)
      }
    }
    invD[[i]] <- t(diag(n) - B[[i]]) %*% Tau[[i]] %*% (diag(n) - B[[i]])
  }
  return(invD)
}

#######################################
### MDAGAR model with no covariates ###
#######################################

# Metropolis within Gibbs Sampler for MCMC updating 
ARDP_joint_diff<-function(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]), 
                          X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
  #y:       data
  #x:       covariates
  #n.atoms: number of atoms in the mixture dist.
  #theta:      the theta's (iid from baseline) in the model
  #alpha:     v~beta(1,alpha)
  #u:       the index indicator of spatial random effect 
  #rho:     sptatial autocorrelation parameter in DAGAR
  #Minc:       0-1 adjacency matrix 
  #V_r:     covariance matrix of joint MDAGAR
  #Q:       presicion matrix of DAGAR
  #r:       random effects following DAGAR
  #F_r:     Marginal CDF of r
  #taued:   presicion in Baseline of DP for disease d   
  #taus:    precision for theta
  
  
  
  
  nq<-length(y)
  n <- nq / q
  p<-ncol(X)
  y1 <- y[1:n]
  y2 <- y[(n+1):(2*n)]
  y3 <- y[(2*n+1):(3*n)]
  y4 <- y[(3*n+1):(4*n)]
  
  E1 <- E[1:n]
  E2 <- E[(n+1):(2*n)]
  E3 <- E[(2*n+1):(3*n)]
  E4 <- E[(3*n+1):(4*n)]
  
  sigmasq_beta = 10000
  keepbeta=matrix(0,runs,p)
  keepphi=matrix(0,runs,nq)
  keeptheta=matrix(0,runs,n.atoms)
  keepA=matrix(0,runs,10)
  keepu=matrix(0,runs,nq)
  keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
  keepv=matrix(0,runs,n.atoms)
  keepr=matrix(0,runs,nq)
  keepFr=matrix(0,runs,nq)
  
  
  #initial values
  
  theta=rep(0,n.atoms)
  
  beta1<-rep(0,ncol(x1))
  beta2<-rep(0,ncol(x2))
  beta3<-rep(0,ncol(x3))
  beta4<-rep(0,ncol(x4))
  beta = c(beta1, beta2, beta3, beta4)
  
  taus = 1
  
  c=2
  d=0.1
  d2=1
  v<-rep(.1,n.atoms)
  probs=makeprobs(v)
  
  #A matrix
  rho=c(0.5, 0.5, 0.5, 0.5)
  eta = log(rho/(1-rho))
  A = matrix(0, q, q)
  for(i in 1:q){
    for(j in 1:i){
      if(j == i){
        A[i, j] = exp(rnorm(1))
      }else{
        A[i, j] = rnorm(1)
      }
    }
  }
  
  Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
  invQ1 = solve(Q[[1]])
  invQ2 = solve(Q[[2]])
  invQ3 = solve(Q[[3]])
  invQ4 = solve(Q[[4]])
  
  kprod = kronecker(A, diag(n))
  invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
  
  Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
  r = rmvnorm(1, rep(0, nq), Vr)
  
  
  F_r=pnorm(r,0,sqrt(diag(Vr)))
  u=makeu(F_r,probs)
  phi <- theta[u]
  
  nu = 2
  R = 0.1 * diag(q)
  
  
  acceptr=acceptv=acceptrho=acceptA=0
  acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
  accepttheta = 0
  
  count<-afterburn<-0
  burn = burn + 1

  for(iter in 1:runs){
    
    if(iter == runs){
      print(iter)
      print(acceptA/(iter-1)) 
      print(acceptr/nq/(iter-1))
      print(acceptrho/(iter-1)) 
      print(acceptv/n.atoms/(iter-1))
      print(accepttheta/n.atoms/(iter-1))
      print(acceptbeta1/(iter-1))
      print(acceptbeta2/(iter-1))
      print(acceptbeta3/(iter-1))
      print(acceptbeta4/(iter-1))
    }
    ######################
    ###   update beta  ###
    ###################### 
    # update beta (intercept only model)
    
    pro_beta1=rnorm(1,beta1,0.02)
    MHrate1=sum(-E1*exp(pro_beta1*x1+theta[u][1:n])+y1*(pro_beta1*x1+theta[u][1:n]))-
      sum(-E1*exp(beta1*x1+theta[u][1:n])+y1*(beta1*x1+theta[u][1:n]))  
    
    if(runif(1,0,1)<exp(MHrate1)){
      beta1<-pro_beta1
      acceptbeta1=acceptbeta1+1
    } 
    
    pro_beta2=rnorm(1,beta2,0.05)
    MHrate2=sum(-E2*exp(pro_beta2*x2+theta[u][(n+1):(2*n)])+y2*(pro_beta2*x2+theta[u][(n+1):(2*n)]))-
      sum(-E2*exp(beta2*x2+theta[u][(n+1):(2*n)])+y2*(beta2*x2+theta[u][(n+1):(2*n)]))  
    
    if(runif(1,0,1)<exp(MHrate2)){
      beta2<-pro_beta2
      acceptbeta2=acceptbeta2+1
    } 
    
    pro_beta3=rnorm(1,beta3,0.05)
    MHrate3=sum(-E3*exp(pro_beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3*x3+theta[u][(2*n+1):(3*n)]))-
      sum(-E3*exp(beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(beta3*x3+theta[u][(2*n+1):(3*n)]))  
    
    if(runif(1,0,1)<exp(MHrate3)){
      beta3<-pro_beta3
      acceptbeta3=acceptbeta3+1
    } 
    
    pro_beta4=rnorm(1,beta4,0.02)
    MHrate4=sum(-E4*exp(pro_beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4*x4+theta[u][(3*n+1):(4*n)]))-
      sum(-E4*exp(beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(beta4*x4+theta[u][(3*n+1):(4*n)]))  
    
    if(runif(1,0,1)<exp(MHrate4)){
      beta4<-pro_beta4
      acceptbeta4=acceptbeta4+1
    } 
    
    beta <- c(beta1, beta2, beta3, beta4)
    
    
    #########################
    ###   update theta    ###
    ######################### 
    
    u1 = u[1:n]
    u2 = u[(n+1):(2*n)]
    u3 = u[(2*n+1):(3*n)]
    u4 = u[(3*n+1):(4*n)]
    
    
    
    for (j in 1:n.atoms){
      
      count[j]=sum(u==j)
      
      if (count[j]==0){
        theta[j]=rnorm(1,0,sqrt(1/taus))
      }else{
        pro_theta=rnorm(1,theta[j],0.06)
        
        MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
          sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
        if(runif(1,0,1)<exp(MHrate)){
          theta[j]<-pro_theta
          accepttheta=accepttheta+1
        } 
      }
    }
    
    
    
    ######################
    ###   update r     ###
    ###################### 
  
    
    for (k in 1:nq){
      pro_r=r;pro_Fr=F_r;pro_u=u
      pro_r[k]=rnorm(1,r[k],1.8)
      pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))         
      pro_u[k]=makeu(pro_Fr[k],probs)
      
      MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
        dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
          if(runif(1,0,1)<exp(MH)){
        r[k]=pro_r[k]
        F_r[k]=pro_Fr[k]
        u[k]=pro_u[k]
        acceptr=acceptr+1
      }
    }
    
    
    ######################
    ###   update   v   ###
    ###################### 
    
    for (j in 1:n.atoms){
      pro_v=v
      pro_v[j]<-rnorm(1,v[j],0.06)
      while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.06)}
    
      pro_probs=makeprobs(pro_v)
      pro_u=makeu(F_r,pro_probs)
      
      MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
        log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
      
      
      if(runif(1,0,1)<exp(MH)){
        v[j]=pro_v[j]
        probs=pro_probs
        u=pro_u
        acceptv=acceptv+1   
      }
    }
    
    
    ######################
    ###   update taus  ###
    ###################### 
    
    
    taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
    
    
    ######################
    ###   update rho  ###
    ###################### 
    
    
    pro_eta1 = rnorm(1, eta[1], 0.4)
    pro_eta2 = rnorm(1, eta[2], 0.4)
    pro_eta3 = rnorm(1, eta[3], 0.4)
    pro_eta4 = rnorm(1, eta[4], 0.4)
    pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
    pro_rho = exp(pro_eta)/(1+exp(pro_eta))
    pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
    pro_invQ1 = solve(pro_Q[[1]])
    pro_invQ2 = solve(pro_Q[[2]])
    pro_invQ3 = solve(pro_Q[[3]])
    pro_invQ4 = solve(pro_Q[[4]])
    
    kprod = kronecker(A, diag(n))
    pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
    pro_Vr = kprod %*% pro_invQ %*% t(kprod)
    
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + 
      log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
      log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
      log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
    
    if(runif(1,0,1)<exp(MH)){
      eta = pro_eta
      rho = pro_rho
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptrho=acceptrho+1 
    }
    
    ######################
    ###   update A  ###
    ###################### 

    pro_A = matrix(0, q, q)
    for(i in 1:q){
      for(j in 1:i){
        if(j == i){
          pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.04))
        }else{
          pro_A[i, j] = rnorm(1, A[i, j], 0.03)
        }
      }
    }
    
    Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
    invQ1 = solve(Q[[1]])
    invQ2 = solve(Q[[2]])
    invQ3 = solve(Q[[3]])
    invQ4 = solve(Q[[4]])
    Sigma =  A %*% t(A)
    
    pro_kprod = kronecker(pro_A, diag(n))
    invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
    pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
    pro_Sigma =  pro_A %*% t(pro_A)
    
    lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
    pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
    
    if(runif(1,0,1)<exp(MH)){
      A = pro_A
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptA=acceptA+1 
    }
    

    ######################
    ### record samples ###
    ###################### 
    
    keeptheta[iter,] = theta
    keepphi[iter,] = theta[u]
    
    keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
    
    keepbeta[iter,]=beta
    keeptaus[iter]=taus
    keeprho1[iter]=rho[1]
    keeprho2[iter]=rho[2]
    keeprho3[iter]=rho[3]
    keeprho4[iter]=rho[4]
    keepu[iter,]=u
    keepv[iter,]=v
    keepr[iter,]=r
    keepFr[iter,]=F_r
    
    #   cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
    
  }
  
  list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
       v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
       rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}

set.seed(123)
mcmc_samples=ARDP_joint_diff(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]), 
                             X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2543418
## [1] 0.1761684
## [1] 0.2381413
## [1] 0.1907908
## [1] 0.2749669
## [1] 0.209007
## [1] 0.2755759
## [1] 0.351045
## [1] 0.2270742
#Estimates of parameters
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
I. Obtain spatial randome effects in original spatial order
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
II. Difference boundaries for each cancer individually

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

a. Top \(75\) pairs of neighboring counties with significant boundary effect

name_diff1 <- names(pvij1[order(pvij1,decreasing = T)][1:75])
name_diff2 <- names(pvij2[order(pvij2,decreasing = T)][1:75])
name_diff3 <- names(pvij3[order(pvij3,decreasing = T)][1:75])
name_diff4 <- names(pvij4[order(pvij4,decreasing = T)][1:75])

# Table for top 75 pairs of neighbors
name_diff = cbind(name_diff1, name_diff2, name_diff3, name_diff4)
colnames(name_diff) = c("Lung", "Esophageal", "Layrnx", "Coloretal")
knitr::kable(name_diff, caption = "Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.")
Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.
Lung Esophageal Layrnx Coloretal
alameda, contra costa butte, sutter merced, tuolumne alameda, contra costa
alameda, san joaquin calaveras, san joaquin mariposa, merced amador, san joaquin
alameda, santa clara calaveras, stanislaus napa, yolo butte, sutter
amador, el dorado el dorado, sacramento lake, yolo calaveras, san joaquin
amador, sacramento fresno, inyo san joaquin, santa clara contra costa, san joaquin
amador, san joaquin inyo, kern san mateo, santa clara fresno, inyo
butte, colusa inyo, san bernardino inyo, kern inyo, kern
butte, sutter inyo, tulare madera, tuolumne inyo, tulare
calaveras, san joaquin kern, san luis obispo sacramento, yolo kern, los angeles
calaveras, stanislaus kings, san luis obispo fresno, inyo kern, san bernardino
colusa, glenn lake, mendocino calaveras, san joaquin kern, san luis obispo
colusa, lake lake, napa inyo, san bernardino kern, santa barbara
contra costa, sacramento lake, yolo calaveras, stanislaus kern, ventura
contra costa, solano los angeles, ventura inyo, tulare kings, san luis obispo
el dorado, sacramento madera, mariposa santa clara, stanislaus lake, yolo
fresno, inyo madera, tuolumne orange, san diego madera, mariposa
fresno, madera mariposa, merced madera, mariposa mariposa, merced
fresno, tulare merced, tuolumne orange, riverside merced, tuolumne
glenn, lake mono, tuolumne contra costa, sacramento monterey, san luis obispo
imperial, riverside monterey, san luis obispo napa, solano napa, yolo
imperial, san diego napa, yolo lassen, plumas san luis obispo, santa barbara
inyo, kern orange, riverside kern, san luis obispo solano, yolo
inyo, mono orange, san diego lassen, shasta sacramento, yolo
inyo, san bernardino san joaquin, santa clara merced, stanislaus inyo, san bernardino
inyo, tulare san mateo, santa clara el dorado, sacramento lassen, shasta
kern, san luis obispo stanislaus, tuolumne colusa, glenn santa clara, stanislaus
kern, santa barbara napa, solano butte, colusa monterey, santa cruz
kern, ventura merced, stanislaus santa clara, santa cruz san mateo, santa clara
kings, san luis obispo kern, santa barbara lassen, modoc calaveras, stanislaus
lake, mendocino lassen, shasta kings, san luis obispo napa, solano
lake, napa santa clara, stanislaus stanislaus, tuolumne butte, yuba
lake, sonoma kern, ventura mono, tuolumne colusa, lake
lake, yolo lassen, plumas mariposa, stanislaus orange, riverside
lassen, plumas butte, colusa sutter, yolo colusa, glenn
lassen, shasta sutter, yolo lake, sonoma madera, tuolumne
los angeles, orange colusa, glenn humboldt, siskiyou sacramento, san joaquin
los angeles, ventura solano, yolo colusa, lake san joaquin, santa clara
madera, mariposa placer, sacramento inyo, mono alameda, santa clara
madera, tuolumne lake, sonoma amador, san joaquin inyo, mono
mariposa, merced mariposa, stanislaus butte, sutter mariposa, stanislaus
mariposa, stanislaus los angeles, orange plumas, sierra san francisco, san mateo
merced, stanislaus sacramento, yolo riverside, san bernardino el dorado, sacramento
merced, tuolumne colusa, lake solano, yolo merced, stanislaus
mono, tuolumne inyo, mono sacramento, san joaquin amador, sacramento
monterey, san luis obispo lassen, modoc alpine, mono humboldt, mendocino
napa, solano mendocino, trinity madera, merced butte, colusa
napa, yolo amador, san joaquin fresno, madera placer, sutter
orange, riverside alameda, santa clara nevada, sierra colusa, yolo
orange, san bernardino madera, merced san benito, santa cruz lassen, plumas
orange, san diego santa clara, santa cruz fresno, mono lassen, modoc
riverside, san bernardino alpine, tuolumne lake, napa plumas, yuba
sacramento, san joaquin mendocino, tehama alameda, stanislaus monterey, san benito
sacramento, yolo san luis obispo, santa barbara alameda, san joaquin riverside, san diego
san francisco, san mateo nevada, sierra alpine, amador los angeles, orange
san joaquin, santa clara orange, san bernardino modoc, siskiyou orange, san bernardino
san luis obispo, santa barbara glenn, lake placer, sutter orange, san diego
san mateo, santa clara fresno, madera alpine, tuolumne sutter, yolo
santa clara, stanislaus fresno, mono kings, monterey humboldt, trinity
solano, yolo alpine, amador san luis obispo, santa barbara alpine, mono
stanislaus, tuolumne humboldt, mendocino lake, mendocino mono, tuolumne
sutter, yolo placer, sutter placer, sacramento fresno, mono
amador, calaveras kern, monterey alpine, calaveras plumas, sierra
alpine, amador sierra, yuba mendocino, trinity lake, sonoma
humboldt, trinity alameda, contra costa sierra, yuba nevada, yuba
mendocino, trinity kings, monterey los angeles, ventura madera, merced
butte, yuba plumas, sierra orange, san bernardino alpine, el dorado
madera, merced sutter, yuba mendocino, tehama mariposa, tuolumne
fresno, kings alameda, san joaquin sutter, yuba humboldt, siskiyou
lassen, modoc los angeles, san bernardino alpine, el dorado stanislaus, tuolumne
mendocino, sonoma nevada, placer colusa, yolo del norte, humboldt
placer, sacramento del norte, siskiyou humboldt, trinity amador, el dorado
alameda, stanislaus alpine, calaveras kern, monterey alpine, amador
san mateo, santa cruz del norte, humboldt del norte, siskiyou alpine, tuolumne
fresno, monterey colusa, yolo monterey, san luis obispo nevada, sierra
mendocino, tehama amador, el dorado monterey, san benito placer, sacramento

Number \(1-61\) for lung cancer, \(1-26\) for esophageal cancer and \(1-22\) for colorectal cancer are ranked by initial letters with \(P(\phi_{id} \neq \phi_{jd} | \bm{y}) = 1\).

b. Estimated FDR curves

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
       lty=1:4, cex=0.7)

c. Difference boundary detection

By setting threshold for FDR as \(0.025\), we get difference boundaries detected for each cancer individually.

# Libraries for map with boundaires
if (!require(gpclib)) install.packages("gpclib", type="source")
gpclibPermit()
## [1] TRUE
library(plyr)
ca.poly@data$id <- rownames(ca.poly@data)
ca.poly.f <- fortify(ca.poly, region = "id")
ca.poly.df <- join(ca.poly.f, ca.poly@data, by = "id")

# Correct boundary data for the map
path=list()
for(i in 1: nrow(neighbor_list0)){
  r1 <- ca.poly.df[ca.poly.df$id %in% neighbor_list0[i,1],1:2]
  r2 <- ca.poly.df[ca.poly.df$id %in% neighbor_list0[i,2],1:2]
  edges <- generics::intersect(r1, r2)
  path[[i]] = edges
}
path[[2]][nrow(path[[2]])+1,] <- path[[2]][1,]
path[[2]] <- path[[2]][-1,]
path[[11]][nrow(path[[11]])+1,] <- path[[11]][1,]
path[[11]] <- path[[11]][-1,]
path[[19]][nrow(path[[19]])+1,] <- path[[19]][1,]
path[[19]] <- path[[19]][-1,]
path[[25]][nrow(path[[25]])+1,] <- path[[25]][1,]
path[[25]] <- path[[25]][-1,]
path[[27]][nrow(path[[27]])+1,] <- path[[27]][1,]
path[[27]] <- path[[27]][-1,]
path[[31]][nrow(path[[31]])+1,] <- path[[31]][1,]
path[[31]] <- path[[31]][-1,]
path[[39]][nrow(path[[39]])+1,] <- path[[39]][1,]
path[[39]] <- path[[39]][-1,]
path[[43]][nrow(path[[43]])+1,] <- path[[43]][1,]
path[[43]] <- path[[43]][-1,]
path[[46]][nrow(path[[46]])+1,] <- path[[46]][1,]
path[[46]] <- path[[46]][-1,]
path[[64]][nrow(path[[64]])+1,] <- path[[64]][1,]
path[[64]] <- path[[64]][-1,]
path[[75]][nrow(path[[75]])+1,] <- path[[75]][1,]
path[[75]] <- path[[75]][-1,]
path[[76]][nrow(path[[76]])+1,] <- path[[76]][1,]
path[[76]] <- path[[76]][-1,]
path[[81]][nrow(path[[81]])+1,] <- path[[81]][1,]
path[[81]] <- path[[81]][-1,]
path[[100]][nrow(path[[100]])+1,] <- path[[100]][1,]
path[[100]] <- path[[100]][-1,]
path[[125]][nrow(path[[125]])+1,] <- path[[125]][1,]
path[[125]] <- path[[125]][-1,]
path[[130]][nrow(path[[130]])+1,] <- path[[130]][1,]
path[[130]] <- path[[130]][-1,]
path[[133]][nrow(path[[133]])+1,] <- path[[133]][1,]
path[[133]] <- path[[133]][-1,]
# Set threshold for FDR: 0.025
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])

est_diff2 <- as.numeric(pvij2 >= threshold2[T2])

est_diff3 <- as.numeric(pvij3 >= threshold3[T3])

est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

# Lung cancer
edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

for(i in which(est_diff1_1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}

# Esophageal cancer
edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}

# Larynx cancer
edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}

# Colorectal cancer
edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 2. Difference boundaries (highlighted in red and blue) detected by MDAGAR in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5)) 

III. Shared difference boundary for each pair of cancers

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}, \phi_{id'} \neq \phi_{jd'}),\quad i \sim j,\quad d \neq d'\)

vij_samplesc12 <- t(apply(cbind(phis1,phis2), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc13 <- t(apply(cbind(phis1,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc14 <- t(apply(cbind(phis1,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc23 <- t(apply(cbind(phis2,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc24 <- t(apply(cbind(phis2,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc34 <- t(apply(cbind(phis3,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

# probabilities: P(phi_id != phi_jd, phi_id' != phi_jd'), i ~ j, d != d'
pvijc12 <- apply(vij_samplesc12, 2, mean)
pvijc13 <- apply(vij_samplesc13, 2, mean)
pvijc14 <- apply(vij_samplesc14, 2, mean)
pvijc23 <- apply(vij_samplesc23, 2, mean)
pvijc24 <- apply(vij_samplesc24, 2, mean)
pvijc34 <- apply(vij_samplesc34, 2, mean)

# Estimated FDR
thresholdc12 = sort(pvijc12, decreasing = TRUE)[T_edge1]
thresholdc13 = sort(pvijc13, decreasing = TRUE)[T_edge1]
thresholdc14 = sort(pvijc14, decreasing = TRUE)[T_edge1]
thresholdc23 = sort(pvijc23, decreasing = TRUE)[T_edge1]
thresholdc24 = sort(pvijc24, decreasing = TRUE)[T_edge1]
thresholdc34 = sort(pvijc34, decreasing = TRUE)[T_edge1]

FDR_estc12 <- rep(0, length(T_edge1))
FDR_estc13 <- rep(0, length(T_edge1))
FDR_estc14 <- rep(0, length(T_edge1))
FDR_estc23 <- rep(0, length(T_edge1))
FDR_estc24 <- rep(0, length(T_edge1))
FDR_estc34 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  thc12 <- thresholdc12[i]
  est_diffc12 <- as.numeric(pvijc12 >= thc12)
  FDR_estc12[i] <- sum((1-pvijc12) * est_diffc12)  / sum(est_diffc12)
  
  thc13 <- thresholdc13[i]
  est_diffc13 <- as.numeric(pvijc13 >= thc13)
  FDR_estc13[i] <- sum((1-pvijc13) * est_diffc13)  / sum(est_diffc13)
  
  thc14 <- thresholdc14[i]
  est_diffc14 <- as.numeric(pvijc14 >= thc14)
  FDR_estc14[i] <- sum((1-pvijc14) * est_diffc14)  / sum(est_diffc14)
  
  thc23 <- thresholdc23[i]
  est_diffc23 <- as.numeric(pvijc23 >= thc23)
  FDR_estc23[i] <- sum((1-pvijc23) * est_diffc23)  / sum(est_diffc23)
  
  thc24 <- thresholdc24[i]
  est_diffc24 <- as.numeric(pvijc24 >= thc24)
  FDR_estc24[i] <- sum((1-pvijc24) * est_diffc24)  / sum(est_diffc24)
  
  thc34 <- thresholdc34[i]
  est_diffc34 <- as.numeric(pvijc34 >= thc34)
  FDR_estc34[i] <- sum((1-pvijc34) * est_diffc34)  / sum(est_diffc34)
}

Use the same threshold as above and identify shared boundaries

alpha_nc = 0.025
T12c = sum(FDR_estc12<=alpha_nc)
T13c = sum(FDR_estc13<=alpha_nc)
T14c = sum(FDR_estc14<=alpha_nc)
T23c = sum(FDR_estc23<=alpha_nc)
T24c = sum(FDR_estc24<=alpha_nc)
T34c = sum(FDR_estc34<=alpha_nc)


est_diffc12 <- as.numeric(pvijc12 >= thresholdc12[T12c])
neighbor_list_diffc12 <- neighbor_list0[est_diffc12 == 1, ]

est_diffc13 <- as.numeric(pvijc13 >= thresholdc13[T13c])
neighbor_list_diffc13 <- neighbor_list0[est_diffc13 == 1, ]

est_diffc14 <- as.numeric(pvijc14 >= thresholdc14[T14c])
neighbor_list_diffc14 <- neighbor_list0[est_diffc14 == 1, ]

est_diffc23 <- as.numeric(pvijc23 >= thresholdc23[T23c])
neighbor_list_diffc23 <- neighbor_list0[est_diffc23 == 1, ]

est_diffc24 <- as.numeric(pvijc24 >= thresholdc24[T24c])
neighbor_list_diffc24 <- neighbor_list0[est_diffc24 == 1, ]

est_diffc34 <- as.numeric(pvijc34 >= thresholdc34[T34c])
neighbor_list_diffc34 <- neighbor_list0[est_diffc34 == 1, ]

edge_plotc12 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc12==1)){
  edge_plotc12 = edge_plotc12 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Esophageal (T = ", T12c, ")", sep=""))
}


edge_plotc13 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc13==1)){
  edge_plotc13 = edge_plotc13 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Layrnx (T = ", T13c, ")", sep=""))
}


edge_plotc14 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc14==1)){
  edge_plotc14 = edge_plotc14 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Colorectal (T = ", T14c, ")", sep=""))
}

edge_plotc23 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc23==1)){
  edge_plotc23 = edge_plotc23 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Layrnx (T = ", T23c, ")", sep=""))
}


edge_plotc24 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc24==1)){
  edge_plotc24 = edge_plotc24 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Colorectal (T = ", T24c, ")", sep=""))
}


edge_plotc34 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc34==1)){
  edge_plotc34 = edge_plotc34 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx, Colorectal (T = ", T34c, ")", sep=""))
}

text <- "Figure 3. Shared difference boundaries (highlighted in red) detected by MDAGAR for each pair of cancer in SIR map. \n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 24)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,0,0,32, "cm"))
ggarrange(plot_0, NULL, NULL, edge_plotc12, edge_plotc13, edge_plotc14, 
          edge_plotc23, edge_plotc24, edge_plotc34, nrow = 3, ncol = 3, heights = c(1,5,5))

IV. Mutual cross-disease difference boundary

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd'}, \phi_{id'} \neq \phi_{jd}),\quad i \sim j, \quad i < j,\quad d \neq d'\)

vij_samples12 <- t(apply(cbind(phis1,phis2), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples13 <- t(apply(cbind(phis1,phis3), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples14 <- t(apply(cbind(phis1,phis4), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))


vij_samples23 <- t(apply(cbind(phis2,phis3), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples24 <- t(apply(cbind(phis2,phis4), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples34 <- t(apply(cbind(phis3,phis4), 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
       x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

# probabilities: P(phi_id != phi_jd', phi_id' != phi_jd), i ~ j, i < j, d != d'
pvij12 <- apply(vij_samples12, 2, mean)
pvij13 <- apply(vij_samples13, 2, mean)
pvij14 <- apply(vij_samples14, 2, mean)
pvij23 <- apply(vij_samples23, 2, mean)
pvij24 <- apply(vij_samples24, 2, mean)
pvij34 <- apply(vij_samples34, 2, mean)

# Estimated FDR
threshold12 = sort(pvij12, decreasing = TRUE)[T_edge1]
threshold13 = sort(pvij13, decreasing = TRUE)[T_edge1]
threshold14 = sort(pvij14, decreasing = TRUE)[T_edge1]
threshold23 = sort(pvij23, decreasing = TRUE)[T_edge1]
threshold24 = sort(pvij24, decreasing = TRUE)[T_edge1]
threshold34 = sort(pvij34, decreasing = TRUE)[T_edge1]

FDR_est12 <- rep(0, length(T_edge1))
FDR_est13 <- rep(0, length(T_edge1))
FDR_est14 <- rep(0, length(T_edge1))
FDR_est23 <- rep(0, length(T_edge1))
FDR_est24 <- rep(0, length(T_edge1))
FDR_est34 <- rep(0, length(T_edge1))

for(i in 1:length(threshold1)){
  th12 <- threshold12[i]
  est_diff12<- as.numeric(pvij12 >= th12)
  FDR_est12[i] <- sum((1-pvij12) * est_diff12)  / sum(est_diff12)
  
  th13 <- threshold13[i]
  est_diff13 <- as.numeric(pvij13 >= th13)
  FDR_est13[i] <- sum((1-pvij13) * est_diff13)  / sum(est_diff13)
  
  th14 <- threshold14[i]
  est_diff14 <- as.numeric(pvij14 >= th14)
  FDR_est14[i] <- sum((1-pvij14) * est_diff14)  / sum(est_diff14)
  
  th23 <- threshold23[i]
  est_diff23 <- as.numeric(pvij23 >= th23)
  FDR_est23[i] <- sum((1-pvij23) * est_diff23)  / sum(est_diff23)
  
  th24 <- threshold24[i]
  est_diff24 <- as.numeric(pvij24 >= th24)
  FDR_est24[i] <- sum((1-pvij24) * est_diff24)  / sum(est_diff24)
  
  th34 <- threshold34[i]
  est_diff34 <- as.numeric(pvij34 >= th34)
  FDR_est34[i] <- sum((1-pvij34) * est_diff34)  / sum(est_diff34)
}

Use the same threshold as above and identify cross-disease boundaries

alpha_n1 = 0.025
T12=sum(FDR_est12<=alpha_n1)
T13=sum(FDR_est13<=alpha_n1)
T14=sum(FDR_est14<=alpha_n1)
T23=sum(FDR_est23<=alpha_n1)
T24=sum(FDR_est24<=alpha_n1)
T34=sum(FDR_est34<=alpha_n1)

est_diff12 <- as.numeric(pvij12 >= threshold12[T12])
est_diff13 <- as.numeric(pvij13 >= threshold13[T13])
est_diff14 <- as.numeric(pvij14 >= threshold23[T14])
est_diff23 <- as.numeric(pvij23 >= threshold23[T23])
est_diff24 <- as.numeric(pvij24 >= threshold24[T24])
est_diff34 <- as.numeric(pvij34 >= threshold34[T34])

edge_plot12 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff12==1)){
  edge_plot12 = edge_plot12 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung vs. Esophageal (T = ", T12, ")", sep=""))
}

edge_plot13 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff13==1)){
  edge_plot13 = edge_plot13 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung vs. Layrnx (T = ", T13, ")", sep=""))
}

edge_plot14 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff14==1)){
  edge_plot14 = edge_plot14 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung vs. Colorectal (T = ", T14, ")", sep=""))
}

edge_plot23 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff23==1)){
  edge_plot23 = edge_plot23 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal vs. Layrnx (T = ", T23, ")", sep=""))
}

edge_plot24 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff24==1)){
  edge_plot24 = edge_plot24 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal vs. Colorectal (T = ", T24, ")", sep=""))
}

edge_plot34 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff34==1)){
  edge_plot34 = edge_plot34 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx vs. Colorectal (T = ", T34, ")", sep=""))
}

text <- "Figure 4. Mutual cross-cancer difference boundaries (highlighted in red) detected by MDAGAR for each pair of cancer in SIR map. \n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 23)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,0,0,33, "cm"))
ggarrange(plot_0, NULL, NULL, edge_plot12, edge_plot13, edge_plot14, 
          edge_plot23, edge_plot24, edge_plot34, nrow = 3, ncol = 3, heights = c(1, 5, 5))

V. Model fitting: D score
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
  
  beta1 <- mcmc_samples$beta[i,1]
  beta2 <- mcmc_samples$beta[i,2]
  beta3 <- mcmc_samples$beta[i,3]
  beta4 <- mcmc_samples$beta[i,4]
  
  W1_mcmc = mcmc_samples$phi[i,1:58]
  W2_mcmc = mcmc_samples$phi[i,59:116]
  W3_mcmc = mcmc_samples$phi[i,117:174]
  W4_mcmc = mcmc_samples$phi[i,175:232]
  
  Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 +  W1_mcmc)) / E1
  Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 +  W2_mcmc)) / E2
  Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 +  W3_mcmc)) / E3
  Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 +  W4_mcmc)) / E4
  
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)

var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)

G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1

G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2

G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3

G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4

D_DAGAR = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_DAGAR
## [1]  2.503756 26.957530 45.340546  2.328468
sum(D_DAGAR)
## [1] 77.1303
3.2.2 Multivariate CAR (MCAR) Model: Implement Metropolis within Gibbs Sampler for MCMC updating
# Matrix for precision matrix of CAR
D=matrix(0,n,n)
for(i in 1:n) D[i,i]=ni[i]

#######################################
### MCAR model with no covariates ###
#######################################

# Metropolis within Gibbs Sampler for MCMC updating 

ARDP_joint_diff_car<-function(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]), 
                              X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
  #y:       data
  #x:       covariates
  #n.atoms: number of atoms in the mixture dist.
  #theta:      the theta's (iid from baseline) in the model
  #alpha:     v~beta(1,alpha)
  #u:       the index indicator of spatial random effect 
  #rho:     sptatial autocorrelation parameter in DAGAR
  #Minc:       0-1 adjacency matrix 
  #V_r:     covariance matrix of joint MDAGAR
  #Q:       presicion matrix of DAGAR
  #r:       random effects following DAGAR
  #F_r:     Marginal CDF of r
  #taued:   presicion in Baseline of DP for disease d   
  #taus:    precision for theta
  
  
  
  
  nq<-length(y)
  n <- nq / q
  p<-ncol(X)
  y1 <- y[1:n]
  y2 <- y[(n+1):(2*n)]
  y3 <- y[(2*n+1):(3*n)]
  y4 <- y[(3*n+1):(4*n)]
  
  E1 <- E[1:n]
  E2 <- E[(n+1):(2*n)]
  E3 <- E[(2*n+1):(3*n)]
  E4 <- E[(3*n+1):(4*n)]
  
  sigmasq_beta = 10000
  keepbeta=matrix(0,runs,p)
  keepphi=matrix(0,runs,nq)
  keeptheta=matrix(0,runs,n.atoms)
  keepA=matrix(0,runs,10)
  keepu=matrix(0,runs,nq)
  keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
  keepv=matrix(0,runs,n.atoms)
  keepr=matrix(0,runs,nq)
  keepFr=matrix(0,runs,nq)
  
  
  #initial values
  
  theta=rep(0,n.atoms)
  
  beta1<-rep(0,ncol(x1))
  beta2<-rep(0,ncol(x2))
  beta3<-rep(0,ncol(x3))
  beta4<-rep(0,ncol(x4))
  beta = c(beta1, beta2, beta3, beta4)
  
  taus = 1
  
  c=2
  d=0.1
  d2=1
  v<-rep(.1,n.atoms)
  probs=makeprobs(v)
  
  #A matrix
  rho=c(0.5, 0.5, 0.5, 0.5)
  eta = log(rho/(1-rho))
  A = matrix(0, q, q)
  for(i in 1:q){
    for(j in 1:i){
      if(j == i){
        A[i, j] = exp(rnorm(1))
      }else{
        A[i, j] = rnorm(1)
      }
    }
  }
  
  invQ1 = solve(D - rho[1] * Minc)
  invQ2 = solve(D - rho[2] * Minc)
  invQ3 = solve(D - rho[3] * Minc)
  invQ4 = solve(D - rho[4] * Minc)
  
  kprod = kronecker(A, diag(n))
  invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
  
  Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
  r = rmvnorm(1, rep(0, nq), Vr)
  
  
  F_r=pnorm(r,0,sqrt(diag(Vr)))
  u=makeu(F_r,probs)
  phi <- theta[u]
  
  nu = 2
  R = 0.1 * diag(q)
  
  
  acceptr=acceptv=acceptrho=acceptA=0
  acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
  accepttheta = 0
  
  count<-afterburn<-0
  burn = burn + 1
  
  for(iter in 1:runs){
    
    if(iter == runs){
      print(iter)
      print(acceptA/(iter-1)) 
      print(acceptr/nq/(iter-1))
      print(acceptrho/(iter-1)) 
      print(acceptv/n.atoms/(iter-1))
      print(accepttheta/n.atoms/(iter-1))
      print(acceptbeta1/(iter-1))
      print(acceptbeta2/(iter-1))
      print(acceptbeta3/(iter-1))
      print(acceptbeta4/(iter-1))
    }
    ######################
    ###   update beta  ###
    ###################### 
    # update beta (intercept only model)
    
    pro_beta1=rnorm(1,beta1,0.02)
    MHrate1=sum(-E1*exp(pro_beta1*x1+theta[u][1:n])+y1*(pro_beta1*x1+theta[u][1:n]))-
      sum(-E1*exp(beta1*x1+theta[u][1:n])+y1*(beta1*x1+theta[u][1:n]))  
    
    if(runif(1,0,1)<exp(MHrate1)){
      beta1<-pro_beta1
      acceptbeta1=acceptbeta1+1
    } 
    
    pro_beta2=rnorm(1,beta2,0.05)
    MHrate2=sum(-E2*exp(pro_beta2*x2+theta[u][(n+1):(2*n)])+y2*(pro_beta2*x2+theta[u][(n+1):(2*n)]))-
      sum(-E2*exp(beta2*x2+theta[u][(n+1):(2*n)])+y2*(beta2*x2+theta[u][(n+1):(2*n)]))  
    
    if(runif(1,0,1)<exp(MHrate2)){
      beta2<-pro_beta2
      acceptbeta2=acceptbeta2+1
    } 
    
    pro_beta3=rnorm(1,beta3,0.05)
    MHrate3=sum(-E3*exp(pro_beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3*x3+theta[u][(2*n+1):(3*n)]))-
      sum(-E3*exp(beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(beta3*x3+theta[u][(2*n+1):(3*n)]))  
    
    if(runif(1,0,1)<exp(MHrate3)){
      beta3<-pro_beta3
      acceptbeta3=acceptbeta3+1
    } 
    
    pro_beta4=rnorm(1,beta4,0.02)
    MHrate4=sum(-E4*exp(pro_beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4*x4+theta[u][(3*n+1):(4*n)]))-
      sum(-E4*exp(beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(beta4*x4+theta[u][(3*n+1):(4*n)]))  
    
    if(runif(1,0,1)<exp(MHrate4)){
      beta4<-pro_beta4
      acceptbeta4=acceptbeta4+1
    } 
    
    beta <- c(beta1, beta2, beta3, beta4)
    
    
    #########################
    ###   update theta    ###
    ######################### 
    
    u1 = u[1:n]
    u2 = u[(n+1):(2*n)]
    u3 = u[(2*n+1):(3*n)]
    u4 = u[(3*n+1):(4*n)]
    
    
    
    for (j in 1:n.atoms){
      
      count[j]=sum(u==j)
      
      if (count[j]==0){
        theta[j]=rnorm(1,0,sqrt(1/taus))
      }else{
        pro_theta=rnorm(1,theta[j],0.06)
        
        MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
          sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
        if(runif(1,0,1)<exp(MHrate)){
          theta[j]<-pro_theta
          accepttheta=accepttheta+1
        } 
      }
    }
    
    
    
    ######################
    ###   update r     ###
    ###################### 
    
    for (k in 1:nq){
      pro_r=r;pro_Fr=F_r;pro_u=u
      pro_r[k]=rnorm(1,r[k],1.7)
      pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))         
      pro_u[k]=makeu(pro_Fr[k],probs)
      
      MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
        dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
      if(runif(1,0,1)<exp(MH)){
        r[k]=pro_r[k]
        F_r[k]=pro_Fr[k]
        u[k]=pro_u[k]
        acceptr=acceptr+1
      }
    }
    
    
    ######################
    ###   update   v   ###
    ###################### 
    
    for (j in 1:n.atoms){

      pro_v=v
      pro_v[j]<-rnorm(1,v[j],0.06)
      while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.06)}
  
      pro_probs=makeprobs(pro_v)
      pro_u=makeu(F_r,pro_probs)
      
      MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
        log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
      
      
      if(runif(1,0,1)<exp(MH)){
        v[j]=pro_v[j]
        probs=pro_probs
        u=pro_u
        acceptv=acceptv+1   
      }
      # }
    }
    
    
    ######################
    ###   update taus  ###
    ###################### 
    
    
    taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
    
    
    ######################
    ###   update rho  ###
    ###################### 
    
    
    pro_eta1 = rnorm(1, eta[1], 1.1)
    pro_eta2 = rnorm(1, eta[2], 1.1)
    pro_eta3 = rnorm(1, eta[3], 1.1)
    pro_eta4 = rnorm(1, eta[4], 1.1)
    pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
    pro_rho = exp(pro_eta)/(1+exp(pro_eta))
    
    pro_invQ1 = solve(D - pro_rho[1] * Minc)
    pro_invQ2 = solve(D - pro_rho[2] * Minc)
    pro_invQ3 = solve(D - pro_rho[3] * Minc)
    pro_invQ4 = solve(D - pro_rho[4] * Minc)
    
    kprod = kronecker(A, diag(n))
    pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
    pro_Vr = kprod %*% pro_invQ %*% t(kprod)
    
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + 
      log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
      log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
      log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
    
    if(runif(1,0,1)<exp(MH)){
      eta = pro_eta
      rho = pro_rho
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptrho=acceptrho+1 
    }
    
    ######################
    ###   update A  ###
    ###################### 

    pro_A = matrix(0, q, q)
    for(i in 1:q){
      for(j in 1:i){
        if(j == i){
          pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.05))
        }else{
          pro_A[i, j] = rnorm(1, A[i, j], 0.05)
        }
      }
    }
    
    invQ1 = solve(D - rho[1] * Minc)
    invQ2 = solve(D - rho[2] * Minc)
    invQ3 = solve(D - rho[3] * Minc)
    invQ4 = solve(D - rho[4] * Minc)
    
    Sigma =  A %*% t(A)
    
    pro_kprod = kronecker(pro_A, diag(n))
    invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
    pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
    pro_Sigma =  pro_A %*% t(pro_A)
    
    lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
    pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
    
    if(runif(1,0,1)<exp(MH)){
      A = pro_A
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptA=acceptA+1 
    }
    
    
    ######################
    ### record samples ###
    ###################### 
    
    keeptheta[iter,] = theta
    keepphi[iter,] = theta[u]
    
    keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
    
    keepbeta[iter,]=beta
    keeptaus[iter]=taus
    keeprho1[iter]=rho[1]
    keeprho2[iter]=rho[2]
    keeprho3[iter]=rho[3]
    keeprho4[iter]=rho[4]
    keepu[iter,]=u
    keepv[iter,]=v
    keepr[iter,]=r
    keepFr[iter,]=F_r
    
    #   cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
    
  }
  
  list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
       v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
       rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff_car(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]), 
                                 X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2113737
## [1] 0.1699972
## [1] 0.29911
## [1] 0.1804282
## [1] 0.3057857
## [1] 0.2128738
## [1] 0.2809094
## [1] 0.3533784
## [1] 0.2302743
#Estimates of parameters
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
I. Obtain spatial randome effects in original spatial order
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
II. Difference boundaries for each cancer individually

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

a. Top \(75\) pairs of neighboring counties with significant boundary effect

name_diff1 <- names(pvij1[order(pvij1,decreasing = T)][1:75])
name_diff2 <- names(pvij2[order(pvij2,decreasing = T)][1:75])
name_diff3 <- names(pvij3[order(pvij3,decreasing = T)][1:75])
name_diff4 <- names(pvij4[order(pvij4,decreasing = T)][1:75])

# Table for top 75 pairs of neighbors
name_diff = cbind(name_diff1, name_diff2, name_diff3, name_diff4)
colnames(name_diff) = c("Lung", "Esophageal", "Layrnx", "Coloretal")
knitr::kable(name_diff, caption = "Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.")
Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.
Lung Esophageal Layrnx Coloretal
alameda, contra costa alameda, contra costa alameda, contra costa alameda, contra costa
alameda, san joaquin alameda, san joaquin alameda, san joaquin alameda, san joaquin
amador, calaveras calaveras, san joaquin calaveras, stanislaus amador, san joaquin
amador, el dorado calaveras, stanislaus colusa, yolo calaveras, stanislaus
amador, sacramento colusa, yolo kern, san luis obispo colusa, yolo
amador, san joaquin kern, san luis obispo kings, san luis obispo contra costa, san joaquin
butte, colusa kings, san luis obispo madera, tuolumne fresno, inyo
calaveras, san joaquin lake, yolo mariposa, stanislaus inyo, kern
calaveras, stanislaus los angeles, orange monterey, san luis obispo inyo, mono
colusa, lake los angeles, ventura napa, yolo inyo, san bernardino
colusa, yolo madera, tuolumne sacramento, yolo inyo, tulare
contra costa, sacramento merced, tuolumne san francisco, san mateo kern, los angeles
contra costa, solano monterey, san luis obispo san joaquin, santa clara kern, san bernardino
el dorado, sacramento napa, yolo san joaquin, stanislaus kern, san luis obispo
fresno, inyo sacramento, yolo san luis obispo, santa barbara kern, ventura
fresno, kings san francisco, san mateo solano, yolo kings, san luis obispo
fresno, tulare san joaquin, santa clara stanislaus, tuolumne lake, yolo
glenn, lake san joaquin, stanislaus sutter, yolo madera, mariposa
imperial, riverside san luis obispo, santa barbara madera, mariposa madera, tuolumne
inyo, kern san mateo, santa clara mariposa, merced mariposa, merced
inyo, mono solano, yolo merced, tuolumne mariposa, stanislaus
inyo, san bernardino stanislaus, tuolumne san mateo, santa clara merced, tuolumne
inyo, tulare sutter, yolo santa clara, stanislaus monterey, san luis obispo
kern, san luis obispo madera, mariposa lake, yolo napa, yolo
kern, tulare amador, san joaquin inyo, san bernardino sacramento, yolo
kings, san luis obispo lake, mendocino calaveras, san joaquin san francisco, san mateo
lake, mendocino los angeles, san bernardino inyo, kern san joaquin, santa clara
lake, napa mariposa, stanislaus fresno, inyo san joaquin, stanislaus
lake, sonoma amador, sacramento lassen, shasta san luis obispo, santa barbara
lake, yolo lake, sonoma amador, san joaquin solano, yolo
lassen, plumas santa clara, stanislaus inyo, tulare stanislaus, tuolumne
lassen, shasta mariposa, merced lake, sonoma kern, santa barbara
los angeles, orange mendocino, trinity colusa, lake calaveras, san joaquin
los angeles, ventura lassen, shasta santa clara, santa cruz santa clara, stanislaus
madera, mariposa colusa, lake lassen, plumas amador, sacramento
madera, tuolumne el dorado, sacramento humboldt, siskiyou butte, sutter
mariposa, merced lake, napa inyo, mono mono, tuolumne
mariposa, stanislaus inyo, san bernardino humboldt, trinity monterey, santa cruz
mendocino, sonoma lassen, plumas amador, sacramento sutter, yolo
merced, tuolumne mendocino, tehama lake, napa san mateo, santa clara
mono, tuolumne inyo, tulare alameda, santa clara lassen, shasta
monterey, san luis obispo fresno, inyo mono, tuolumne butte, yuba
napa, yolo placer, sacramento lake, mendocino sacramento, san joaquin
orange, riverside inyo, kern mendocino, trinity alameda, santa clara
orange, san bernardino santa clara, santa cruz butte, colusa riverside, san diego
riverside, san bernardino nevada, placer butte, sutter orange, riverside
sacramento, yolo kern, ventura riverside, san bernardino riverside, san bernardino
san francisco, san mateo del norte, siskiyou mendocino, tehama monterey, san benito
san joaquin, santa clara inyo, mono nevada, placer colusa, lake
san joaquin, stanislaus glenn, lake el dorado, sacramento el dorado, sacramento
san luis obispo, santa barbara orange, riverside del norte, siskiyou plumas, yuba
san mateo, santa clara kern, santa barbara orange, riverside lassen, plumas
santa clara, stanislaus butte, colusa plumas, sierra imperial, riverside
solano, yolo butte, sutter san benito, santa clara humboldt, trinity
stanislaus, tuolumne riverside, san bernardino plumas, yuba mariposa, tuolumne
sutter, yolo humboldt, mendocino modoc, siskiyou merced, stanislaus
sacramento, san joaquin mono, tuolumne glenn, lake alpine, mono
kern, santa barbara nevada, sierra los angeles, ventura amador, el dorado
humboldt, trinity alpine, tuolumne alpine, calaveras nevada, yuba
mendocino, trinity plumas, sierra alpine, amador santa clara, santa cruz
butte, sutter del norte, humboldt glenn, tehama lake, napa
monterey, santa cruz merced, stanislaus alpine, tuolumne alpine, amador
merced, stanislaus modoc, siskiyou imperial, riverside colusa, glenn
kern, ventura glenn, tehama nevada, sierra butte, colusa
alameda, santa clara alpine, calaveras alpine, el dorado alpine, calaveras
imperial, san diego lassen, modoc el dorado, placer humboldt, siskiyou
butte, yuba nevada, yuba merced, stanislaus plumas, sierra
alameda, stanislaus alpine, el dorado alpine, mono fresno, san benito
placer, sacramento shasta, trinity lassen, modoc calaveras, tuolumne
alpine, amador alpine, amador modoc, shasta lake, mendocino
colusa, glenn alpine, mono merced, santa clara lake, sonoma
plumas, yuba humboldt, trinity contra costa, sacramento alpine, tuolumne
mendocino, tehama orange, san diego colusa, glenn alpine, el dorado
del norte, siskiyou kings, monterey kings, monterey modoc, shasta
lassen, modoc napa, solano napa, solano napa, solano

b. Estimated FDR curves

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
       lty=1:4, cex=0.7)

c. Difference boundary detection

By setting FDR threshold the same as MDAGAR (\(0.025\)), we get difference boundaries detected for each cancer individually.

# Set threshold for FDR: 0.025
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])

est_diff2 <- as.numeric(pvij2 >= threshold2[T2])

est_diff3 <- as.numeric(pvij3 >= threshold3[T3])

est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

# Lung cancer
edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

for(i in which(est_diff1_1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}

# Esophageal cancer
edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}

# Larynx cancer
edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}

# Colorectal cancer
edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 5. Difference boundaries (highlighted in red and blue) detected by MCAR in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5)) 

III. Model fitting: D score
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
  
  beta1 <- mcmc_samples$beta[i,1]
  beta2 <- mcmc_samples$beta[i,2]
  beta3 <- mcmc_samples$beta[i,3]
  beta4 <- mcmc_samples$beta[i,4]
  
  W1_mcmc = mcmc_samples$phi[i,1:58]
  W2_mcmc = mcmc_samples$phi[i,59:116]
  W3_mcmc = mcmc_samples$phi[i,117:174]
  W4_mcmc = mcmc_samples$phi[i,175:232]
  
  Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 +  W1_mcmc)) / E1
  Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 +  W2_mcmc)) / E2
  Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 +  W3_mcmc)) / E3
  Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 +  W4_mcmc)) / E4
  
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)

var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)

G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1

G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2

G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3

G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4

D_CAR = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_CAR
## [1]  2.314186 26.053475 43.448061  2.057750
sum(D_CAR)
## [1] 73.87347

You can also apply other analysis as MDAGAR here for more details.

3.3 Independent-disease Models implemented in rjags

3.3.1 DAGAR\(_{ind}\) Model
sink("MDAGAR_ARDP_ind.txt")
cat("
    model
    {
    
    for (i in 1:k)
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ1[i,i])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (k+1):(2*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ2[i-k,i-k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (2*k+1):(3*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ3[i-2*k,i-2*k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (3*k+1):(4*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ4[i-3*k,i-3*k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    
    pi[1] <- V[1]
    prod[1] <- 1 - pi[1]
    for (h in 2:H){
    prod[h] <- prod[h-1] * (1-V[h])
    pi[h] <- V[h] * prod[h-1]
    }
    
    for (h in 1:H){
    theta[h] ~ dnorm(0,taus)
    V[h] ~ dbeta(1,alpha)
    sum_pi[h] <- sum(pi[1:h])
    }
    
   for(i in 1:2){
       Tau1[i] <- 1
       Tau2[i] <- 1
       Tau3[i] <- 1
       Tau4[i] <- 1
       r1[i] ~ dnorm(0, tau1*Tau1[i])
       r2[i] ~ dnorm(0, tau2*Tau2[i])
       r3[i] ~ dnorm(0, tau3*Tau3[i])
       r4[i] ~ dnorm(0, tau4*Tau4[i])
   }
   for (i in 3:k){
       Tau1[i] <- (1 + (ns[i-1] - 1) * rho1^2) / (1 - rho1^2)
       Tau2[i] <- (1 + (ns[i-1] - 1) * rho2^2) / (1 - rho2^2)
       Tau3[i] <- (1 + (ns[i-1] - 1) * rho3^2) / (1 - rho3^2)
       Tau4[i] <- (1 + (ns[i-1] - 1) * rho4^2) / (1 - rho4^2)

       b1[i] <- rho1 / (1 + (ns[i-1] - 1) * rho1^2)
       b2[i] <- rho2 / (1 + (ns[i-1] - 1) * rho2^2)
       b3[i] <- rho3 / (1 + (ns[i-1] - 1) * rho3^2)
       b4[i] <- rho4 / (1 + (ns[i-1] - 1) * rho4^2)
      
      for(j in 1:ns[i-1]){
         t1[i,j] <- r1[udnei[(cn[i-1] + j)]] * b1[i]
         t2[i,j] <- r2[udnei[(cn[i-1] + j)]] * b2[i]
         t3[i,j] <- r3[udnei[(cn[i-1] + j)]] * b3[i]
         t4[i,j] <- r4[udnei[(cn[i-1] + j)]] * b4[i]
      }
      for(j in (ns[i-1]+1):mns){
        t1[i,j] <- 0
        t2[i,j] <- 0
        t3[i,j] <- 0
        t4[i,j] <- 0
      }
      r1[i] ~ dnorm(sum(t1[i,]), tau1*Tau1[i])
      r2[i] ~ dnorm(sum(t2[i,]), tau2*Tau2[i])
      r3[i] ~ dnorm(sum(t3[i,]), tau3*Tau3[i])
      r4[i] ~ dnorm(sum(t4[i,]), tau4*Tau4[i])
   }
    
   for(i in 1:2){
      Taum1[i, i] <- 1
      Taum2[i, i] <- 1
      Taum3[i, i] <- 1
      Taum4[i, i] <- 1
    
      for(j in 1:k){
         B1[i,j] <- 0
         B2[i,j] <- 0
         B3[i,j] <- 0
         B4[i,j] <- 0
      }
    }
    for(l in 1:(k-1)){
       for(m in (l+1):k){
          Taum1[l,m] <- 0
          Taum2[l,m] <- 0
          Taum3[l,m] <- 0
          Taum4[l,m] <- 0
        }
    }

    Taum1[2,1] <- 0
    Taum2[2,1] <- 0
    Taum3[2,1] <- 0
    Taum4[2,1] <- 0
    for (i in 3:k) {
        Taum1[i,i] <- Tau1[i]
        Taum2[i,i] <- Tau2[i]
        Taum3[i,i] <- Tau3[i]
        Taum4[i,i] <- Tau4[i]
   
      for(m in 1:(i-1)){
        Taum1[i,m] <- 0
        Taum2[i,m] <- 0
        Taum3[i,m] <- 0
        Taum4[i,m] <- 0
       }

      for(j in (udnei[(cn[i-1] + 1):(cn[i-1] + ns[i-1])])){
        B1[i,j] <- b1[i]
        B2[i,j] <- b2[i]
        B3[i,j] <- b3[i]
        B4[i,j] <- b4[i]
      }
    
     for(j in index1[((k)*(i-3)-cn[i-1]+1) : ((k)*(i-3)-cn[i-1] + (k - ns[i-1]))]){
        B1[i,j] <- 0
        B2[i,j] <- 0
        B3[i,j] <- 0
        B4[i,j] <- 0
     }
  }
     
    Q1 <- tau1 * t(Ik - B1) %*% Taum1 %*% (Ik - B1)
    Q2 <- tau2 * t(Ik - B2) %*% Taum2 %*% (Ik - B2)
    Q3 <- tau3 * t(Ik - B3) %*% Taum3 %*% (Ik - B3)
    Q4 <- tau4 * t(Ik - B4) %*% Taum4 %*% (Ik - B4)
    r[1:k] <- r1[1:k]
    r[(k+1):(2*k)] <- r2[1:k]
    r[(2*k+1):(3*k)] <- r3[1:k]
    r[(3*k+1):(4*k)] <- r4[1:k]
    
    invQ1 <- inverse(Q1)
    invQ2 <- inverse(Q2)
    invQ3 <- inverse(Q3)
    invQ4 <- inverse(Q4)
    
    rho1 ~ dunif(0, 0.98)
    rho2 ~ dunif(0, 0.98)
    rho3 ~ dunif(0, 0.98)
    rho4 ~ dunif(0, 0.98)
    tau1 ~ dgamma(2, 0.1)
    tau2 ~ dgamma(2, 0.1)
    tau3 ~ dgamma(2, 0.1)
    tau4 ~ dgamma(2, 0.1)
    
    taus ~ dgamma(2,1)
    beta[1:ncolumn] ~ dmnorm(rep(0,ncolumn), (0.0001*I));
    }
    ", fill = TRUE)
sink()
model.data1 <- list(k = n,  I = diag(ncol(X)), X = X, Y = Y, E = E, Ik = diag(n), index1 = index1,
                    alpha = 1, ncolumn = ncol(X),  H = 15,  ns = dni, udnei = udnei, cn = c(0, cni))
model.inits <- rep(list(list(rho1 = 0.1, rho2 = 0.1, rho3 = 0.1, rho4 = 0.1, tau1 = 1, tau2 = 1, tau3 = 1, tau4 = 1,  
                             taus = 1, beta = rep(0, ncol(X)))),2)
model.param <- c("beta", "rho1", "rho2", "rho3", "rho4", "tau1", "tau2", "tau3", "tau4", 
                 "taus", "phi", "u")
set.seed(123)
mcmc_samples <- jags(model.data1, model.inits, model.param, "MDAGAR_ARDP_ind.txt",
                   n.chains = 2, n.iter = 25000,n.burnin = 20000, n.thin = 1)
# Parameter estimates
sample.mcmc = mcmc_samples$BUGSoutput$sims.matrix[,c(246, 238:245)]
I. Obtain spatial randome effects in original spatial order
phis <- mcmc_samples$BUGSoutput$sims.matrix[,6:237]
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
II. Difference boundaries for each cancer individually

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

a. Estimated FDR curves

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
       lty=1:4, cex=0.7)

b. Difference boundary detection

By setting FDR threshold as \(0.1\), we get difference boundaries detected for each cancer individually.

# Set threshold for FDR: 0.025
alpha_n = 0.1
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])

est_diff2 <- as.numeric(pvij2 >= threshold2[T2])

est_diff3 <- as.numeric(pvij3 >= threshold3[T3])

est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

# Lung cancer
edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

for(i in which(est_diff1_1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}

# Esophageal cancer
edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}

# Larynx cancer
edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}

# Colorectal cancer
edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 6. Difference boundaries (highlighted in red and blue) detected by DAGAR_ind in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5)) 
III. Shared difference boundary for each pair of cancers

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}, \phi_{id'} \neq \phi_{jd'}),\quad i \sim j,\quad d \neq d'\)

vij_samplesc12 <- t(apply(cbind(phis1,phis2), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc13 <- t(apply(cbind(phis1,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc14 <- t(apply(cbind(phis1,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc23 <- t(apply(cbind(phis2,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc24 <- t(apply(cbind(phis2,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc34 <- t(apply(cbind(phis3,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

# probabilities: P(phi_id != phi_jd, phi_id' != phi_jd'), i ~ j, d != d'
pvijc12 <- apply(vij_samplesc12, 2, mean)
pvijc13 <- apply(vij_samplesc13, 2, mean)
pvijc14 <- apply(vij_samplesc14, 2, mean)
pvijc23 <- apply(vij_samplesc23, 2, mean)
pvijc24 <- apply(vij_samplesc24, 2, mean)
pvijc34 <- apply(vij_samplesc34, 2, mean)

# Estimated FDR
thresholdc12 = sort(pvijc12, decreasing = TRUE)[T_edge1]
thresholdc13 = sort(pvijc13, decreasing = TRUE)[T_edge1]
thresholdc14 = sort(pvijc14, decreasing = TRUE)[T_edge1]
thresholdc23 = sort(pvijc23, decreasing = TRUE)[T_edge1]
thresholdc24 = sort(pvijc24, decreasing = TRUE)[T_edge1]
thresholdc34 = sort(pvijc34, decreasing = TRUE)[T_edge1]

FDR_estc12 <- rep(0, length(T_edge1))
FDR_estc13 <- rep(0, length(T_edge1))
FDR_estc14 <- rep(0, length(T_edge1))
FDR_estc23 <- rep(0, length(T_edge1))
FDR_estc24 <- rep(0, length(T_edge1))
FDR_estc34 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  thc12 <- thresholdc12[i]
  est_diffc12 <- as.numeric(pvijc12 >= thc12)
  FDR_estc12[i] <- sum((1-pvijc12) * est_diffc12)  / sum(est_diffc12)
  
  thc13 <- thresholdc13[i]
  est_diffc13 <- as.numeric(pvijc13 >= thc13)
  FDR_estc13[i] <- sum((1-pvijc13) * est_diffc13)  / sum(est_diffc13)
  
  thc14 <- thresholdc14[i]
  est_diffc14 <- as.numeric(pvijc14 >= thc14)
  FDR_estc14[i] <- sum((1-pvijc14) * est_diffc14)  / sum(est_diffc14)
  
  thc23 <- thresholdc23[i]
  est_diffc23 <- as.numeric(pvijc23 >= thc23)
  FDR_estc23[i] <- sum((1-pvijc23) * est_diffc23)  / sum(est_diffc23)
  
  thc24 <- thresholdc24[i]
  est_diffc24 <- as.numeric(pvijc24 >= thc24)
  FDR_estc24[i] <- sum((1-pvijc24) * est_diffc24)  / sum(est_diffc24)
  
  thc34 <- thresholdc34[i]
  est_diffc34 <- as.numeric(pvijc34 >= thc34)
  FDR_estc34[i] <- sum((1-pvijc34) * est_diffc34)  / sum(est_diffc34)
}

Use the same threshold as individual cancer and identify shared boundaries

alpha_nc = 0.1
T12c = sum(FDR_estc12<=alpha_nc)
T13c = sum(FDR_estc13<=alpha_nc)
T14c = sum(FDR_estc14<=alpha_nc)
T23c = sum(FDR_estc23<=alpha_nc)
T24c = sum(FDR_estc24<=alpha_nc)
T34c = sum(FDR_estc34<=alpha_nc)


est_diffc12 <- as.numeric(pvijc12 >= thresholdc12[T12c])
neighbor_list_diffc12 <- neighbor_list0[est_diffc12 == 1, ]

est_diffc13 <- as.numeric(pvijc13 >= thresholdc13[T13c])
neighbor_list_diffc13 <- neighbor_list0[est_diffc13 == 1, ]

est_diffc14 <- as.numeric(pvijc14 >= thresholdc14[T14c])
neighbor_list_diffc14 <- neighbor_list0[est_diffc14 == 1, ]

est_diffc23 <- as.numeric(pvijc23 >= thresholdc23[T23c])
neighbor_list_diffc23 <- neighbor_list0[est_diffc23 == 1, ]

est_diffc24 <- as.numeric(pvijc24 >= thresholdc24[T24c])
neighbor_list_diffc24 <- neighbor_list0[est_diffc24 == 1, ]

est_diffc34 <- as.numeric(pvijc34 >= thresholdc34[T34c])
neighbor_list_diffc34 <- neighbor_list0[est_diffc34 == 1, ]

edge_plotc12 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc12==1)){
  edge_plotc12 = edge_plotc12 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Esophageal (T = ", T12c, ")", sep=""))
}


edge_plotc13 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc13==1)){
  edge_plotc13 = edge_plotc13 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Layrnx (T = ", T13c, ")", sep=""))
}


edge_plotc14 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc14==1)){
  edge_plotc14 = edge_plotc14 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Colorectal (T = ", T14c, ")", sep=""))
}

edge_plotc23 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc23==1)){
  edge_plotc23 = edge_plotc23 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Layrnx (T = ", T23c, ")", sep=""))
}


edge_plotc24 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc24==1)){
  edge_plotc24 = edge_plotc24 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Colorectal (T = ", T24c, ")", sep=""))
}


edge_plotc34 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc34==1)){
  edge_plotc34 = edge_plotc34 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx, Colorectal (T = ", T34c, ")", sep=""))
}

text <- "Figure 7. Shared difference boundaries (highlighted in red) detected by DAGAR_ind for each pair of cancer in SIR map. \n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 24)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,0,0,32, "cm"))
ggarrange(plot_0, NULL, NULL, edge_plotc12, edge_plotc13, edge_plotc14, 
          edge_plotc23, edge_plotc24, edge_plotc34, nrow = 3, ncol = 3, heights = c(1,5,5))
IV. Model fitting: D score
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
  
  beta1 <- mcmc_samples$beta[i,1]
  beta2 <- mcmc_samples$beta[i,2]
  beta3 <- mcmc_samples$beta[i,3]
  beta4 <- mcmc_samples$beta[i,4]
  
  W1_mcmc = mcmc_samples$phi[i,1:58]
  W2_mcmc = mcmc_samples$phi[i,59:116]
  W3_mcmc = mcmc_samples$phi[i,117:174]
  W4_mcmc = mcmc_samples$phi[i,175:232]
  
  Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 +  W1_mcmc)) / E1
  Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 +  W2_mcmc)) / E2
  Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 +  W3_mcmc)) / E3
  Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 +  W4_mcmc)) / E4
  
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)

var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)

G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1

G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2

G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3

G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4

D_DAGARind = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_DAGARind
sum(D_DAGARind)
3.3.2 CAR\(_{ind}\) Model
sink("MCAR_ARDP_ind.txt")
cat("
    model
    {
    
    for (i in 1:k)
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ1[i,i])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (k+1):(2*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ2[i-k,i-k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (2*k+1):(3*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ3[i-2*k,i-2*k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    for (i in (3*k+1):(4*k))
    {
    log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
    Y[i] ~ dpois(mu[i])
    phi[i] <- inprod(theta, u[i,])
    Fr[i] <- pnorm(r[i], 0, 1/invQ4[i-3*k,i-3*k])
    for(h in 2:H){
    u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
    }
    u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
    }
    
    
    pi[1] <- V[1]
    prod[1] <- 1 - pi[1]
    for (h in 2:(H-1)){
    prod[h] <- prod[h-1] * (1-V[h])
    pi[h] <- V[h] * prod[h-1]
    }
    pi[H] <- 1-sum(pi[1:(H-1)])
    
    for (h in 1:H){
    theta[h] ~ dnorm(0,taus)
    V[h] ~ dbeta(1,alpha)
    sum_pi[h] <- sum(pi[1:h])
    }
    
    Q1 <- tau1 * (D - rho1 * Minc)
    Q2 <- tau2 * (D - rho2 * Minc)
    Q3 <- tau3 * (D - rho3 * Minc)
    Q4 <- tau4 * (D - rho4 * Minc)
    
    r1[1:k] ~ dmnorm(rep(0, k), Q1)
    r2[1:k] ~ dmnorm(rep(0, k), Q2)
    r3[1:k] ~ dmnorm(rep(0, k), Q3)
    r4[1:k] ~ dmnorm(rep(0, k), Q4)
    r[1:k] <- r1[1:k]
    r[(k+1):(2*k)] <- r2[1:k]
    r[(2*k+1):(3*k)] <- r3[1:k]
    r[(3*k+1):(4*k)] <- r4[1:k]
    
    invQ1 <- inverse(Q1)
    invQ2 <- inverse(Q2)
    invQ3 <- inverse(Q3)
    invQ4 <- inverse(Q4)
    
    rho1 ~ dunif(0, 0.98)
    rho2 ~ dunif(0, 0.98)
    rho3 ~ dunif(0, 0.98)
    rho4 ~ dunif(0, 0.98)
    tau1 ~ dgamma(2, 0.1)
    tau2 ~ dgamma(2, 0.1)
    tau3 ~ dgamma(2, 0.1)
    tau4 ~ dgamma(2, 0.1)
    
    taus ~ dgamma(2,1)
    beta[1:ncolumn] ~ dmnorm(rep(0,ncolumn), (0.0001*I));
    }
    ", fill = TRUE)
sink()
model.data1 <- list(k = n,  I = diag(ncol(X)), X = X, Y = Y, E = E, 
                    alpha = 1, ncolumn = ncol(X),  H = 15,  D = D, Minc = Minc)
model.inits <- rep(list(list(rho1 = 0.1, rho2 = 0.1, rho3 = 0.1, rho4 = 0.1, tau1 = 1, tau2 = 1, tau3 = 1, tau4 = 1,  
                             taus = 1, beta = rep(0, ncol(X)))),2)
model.param <- c("beta", "rho1", "rho2", "rho3", "rho4", "tau1", "tau2", "tau3", "tau4", 
                 "taus", "phi", "u")
set.seed(123)
mcmc_samples <- jags(model.data1, model.inits, model.param, "MCAR_ARDP_ind.txt",
                 n.chains = 2, n.iter = 25000,n.burnin = 20000, n.thin = 1)
# Parameter estimates
sample.mcmc = mcmc_samples$BUGSoutput$sims.matrix[,c(246, 238:245)]
I. Obtain spatial randome effects in original spatial order
phis <- mcmc_samples$BUGSoutput$sims.matrix[,6:237]
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
II. Difference boundaries for each cancer individually

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

a. Estimated FDR curves

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
       lty=1:4, cex=0.7)

b. Difference boundary detection

By setting FDR threshold as \(0.1\), we get difference boundaries detected for each cancer individually.

# Set threshold for FDR: 0.025
alpha_n = 0.1
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])

est_diff2 <- as.numeric(pvij2 >= threshold2[T2])

est_diff3 <- as.numeric(pvij3 >= threshold3[T3])

est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

# Lung cancer
edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

for(i in which(est_diff1_1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}

# Esophageal cancer
edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}

# Larynx cancer
edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}

# Colorectal cancer
edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 6. Difference boundaries (highlighted in red and blue) detected by DAGAR_ind in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5)) 
III. Shared difference boundary for each pair of cancers

We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}, \phi_{id'} \neq \phi_{jd'}),\quad i \sim j,\quad d \neq d'\)

vij_samplesc12 <- t(apply(cbind(phis1,phis2), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc13 <- t(apply(cbind(phis1,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc14 <- t(apply(cbind(phis1,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc23 <- t(apply(cbind(phis2,phis3), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc24 <- t(apply(cbind(phis2,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

vij_samplesc34 <- t(apply(cbind(phis3,phis4), 1, function(x){
  diff_bc1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]] & x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]+58]){
      diff_bc1 <- c(diff_bc1, 1)
    }else{
      diff_bc1 <- c(diff_bc1, 0)
    }
  }
  return(diff_bc1)
}))

# probabilities: P(phi_id != phi_jd, phi_id' != phi_jd'), i ~ j, d != d'
pvijc12 <- apply(vij_samplesc12, 2, mean)
pvijc13 <- apply(vij_samplesc13, 2, mean)
pvijc14 <- apply(vij_samplesc14, 2, mean)
pvijc23 <- apply(vij_samplesc23, 2, mean)
pvijc24 <- apply(vij_samplesc24, 2, mean)
pvijc34 <- apply(vij_samplesc34, 2, mean)

# Estimated FDR
thresholdc12 = sort(pvijc12, decreasing = TRUE)[T_edge1]
thresholdc13 = sort(pvijc13, decreasing = TRUE)[T_edge1]
thresholdc14 = sort(pvijc14, decreasing = TRUE)[T_edge1]
thresholdc23 = sort(pvijc23, decreasing = TRUE)[T_edge1]
thresholdc24 = sort(pvijc24, decreasing = TRUE)[T_edge1]
thresholdc34 = sort(pvijc34, decreasing = TRUE)[T_edge1]

FDR_estc12 <- rep(0, length(T_edge1))
FDR_estc13 <- rep(0, length(T_edge1))
FDR_estc14 <- rep(0, length(T_edge1))
FDR_estc23 <- rep(0, length(T_edge1))
FDR_estc24 <- rep(0, length(T_edge1))
FDR_estc34 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  thc12 <- thresholdc12[i]
  est_diffc12 <- as.numeric(pvijc12 >= thc12)
  FDR_estc12[i] <- sum((1-pvijc12) * est_diffc12)  / sum(est_diffc12)
  
  thc13 <- thresholdc13[i]
  est_diffc13 <- as.numeric(pvijc13 >= thc13)
  FDR_estc13[i] <- sum((1-pvijc13) * est_diffc13)  / sum(est_diffc13)
  
  thc14 <- thresholdc14[i]
  est_diffc14 <- as.numeric(pvijc14 >= thc14)
  FDR_estc14[i] <- sum((1-pvijc14) * est_diffc14)  / sum(est_diffc14)
  
  thc23 <- thresholdc23[i]
  est_diffc23 <- as.numeric(pvijc23 >= thc23)
  FDR_estc23[i] <- sum((1-pvijc23) * est_diffc23)  / sum(est_diffc23)
  
  thc24 <- thresholdc24[i]
  est_diffc24 <- as.numeric(pvijc24 >= thc24)
  FDR_estc24[i] <- sum((1-pvijc24) * est_diffc24)  / sum(est_diffc24)
  
  thc34 <- thresholdc34[i]
  est_diffc34 <- as.numeric(pvijc34 >= thc34)
  FDR_estc34[i] <- sum((1-pvijc34) * est_diffc34)  / sum(est_diffc34)
}

Use the same threshold as individual cancer and identify shared boundaries

alpha_nc = 0.1
T12c = sum(FDR_estc12<=alpha_nc)
T13c = sum(FDR_estc13<=alpha_nc)
T14c = sum(FDR_estc14<=alpha_nc)
T23c = sum(FDR_estc23<=alpha_nc)
T24c = sum(FDR_estc24<=alpha_nc)
T34c = sum(FDR_estc34<=alpha_nc)


est_diffc12 <- as.numeric(pvijc12 >= thresholdc12[T12c])
neighbor_list_diffc12 <- neighbor_list0[est_diffc12 == 1, ]

est_diffc13 <- as.numeric(pvijc13 >= thresholdc13[T13c])
neighbor_list_diffc13 <- neighbor_list0[est_diffc13 == 1, ]

est_diffc14 <- as.numeric(pvijc14 >= thresholdc14[T14c])
neighbor_list_diffc14 <- neighbor_list0[est_diffc14 == 1, ]

est_diffc23 <- as.numeric(pvijc23 >= thresholdc23[T23c])
neighbor_list_diffc23 <- neighbor_list0[est_diffc23 == 1, ]

est_diffc24 <- as.numeric(pvijc24 >= thresholdc24[T24c])
neighbor_list_diffc24 <- neighbor_list0[est_diffc24 == 1, ]

est_diffc34 <- as.numeric(pvijc34 >= thresholdc34[T34c])
neighbor_list_diffc34 <- neighbor_list0[est_diffc34 == 1, ]

edge_plotc12 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc12==1)){
  edge_plotc12 = edge_plotc12 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Esophageal (T = ", T12c, ")", sep=""))
}


edge_plotc13 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc13==1)){
  edge_plotc13 = edge_plotc13 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Layrnx (T = ", T13c, ")", sep=""))
}


edge_plotc14 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc14==1)){
  edge_plotc14 = edge_plotc14 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung, Colorectal (T = ", T14c, ")", sep=""))
}

edge_plotc23 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc23==1)){
  edge_plotc23 = edge_plotc23 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Layrnx (T = ", T23c, ")", sep=""))
}


edge_plotc24 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc24==1)){
  edge_plotc24 = edge_plotc24 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal, Colorectal (T = ", T24c, ")", sep=""))
}


edge_plotc34 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diffc34==1)){
  edge_plotc34 = edge_plotc34 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=14, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx, Colorectal (T = ", T34c, ")", sep=""))
}

text <- "Figure 7. Shared difference boundaries (highlighted in red) detected by DAGAR_ind for each pair of cancer in SIR map. \n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 24)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,0,0,32, "cm"))
ggarrange(plot_0, NULL, NULL, edge_plotc12, edge_plotc13, edge_plotc14, 
          edge_plotc23, edge_plotc24, edge_plotc34, nrow = 3, ncol = 3, heights = c(1,5,5))
IV. Model fitting: D score
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
  
  beta1 <- mcmc_samples$beta[i,1]
  beta2 <- mcmc_samples$beta[i,2]
  beta3 <- mcmc_samples$beta[i,3]
  beta4 <- mcmc_samples$beta[i,4]
  
  W1_mcmc = mcmc_samples$phi[i,1:58]
  W2_mcmc = mcmc_samples$phi[i,59:116]
  W3_mcmc = mcmc_samples$phi[i,117:174]
  W4_mcmc = mcmc_samples$phi[i,175:232]
  
  Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 +  W1_mcmc)) / E1
  Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 +  W2_mcmc)) / E2
  Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 +  W3_mcmc)) / E3
  Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 +  W4_mcmc)) / E4
  
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)

var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)

G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1

G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2

G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3

G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4

D_CARind = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_CARind
sum(D_CARind)

4. Difference boundary detection accounting for covariates using MDAGAR

Accounting for covariates would change difference boundaries detected for each cancer. Here we include two examples: (1) only smoking rates as covariate; (2) all covariates listed in Section 2.1

4.1 Only smoking rates as covariate

# Construct covariates matrix
X = as.matrix(bdiag(bdiag(X1[,1:2], X2[,1:2]), bdiag(X3[,1:2],X4[,1:2])))

ARDP_joint_diff_smoke<-function(y=Y, x1=as.matrix(X1[,1:2]), x2=as.matrix(X2[,1:2]), x3 = as.matrix(X3[,1:2]), x4 = as.matrix(X4[,1:2]), 
                          X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
  #y:       data
  #x:       covariates
  #n.atoms: number of atoms in the mixture dist.
  #theta:      the theta's (iid from baseline) in the model
  #alpha:     v~beta(1,alpha)
  #u:       the index indicator of spatial random effect 
  #rho:     sptatial autocorrelation parameter in DAGAR
  #Minc:       0-1 adjacency matrix 
  #V_r:     covariance matrix of joint MDAGAR
  #Q:       presicion matrix of DAGAR
  #r:       random effects following DAGAR
  #F_r:     Marginal CDF of r
  #taued:   presicion in Baseline of DP for disease d   
  #taus:    precision for theta
  
  
  
  
  nq<-length(y)
  n <- nq / q
  p<-ncol(X)
  y1 <- y[1:n]
  y2 <- y[(n+1):(2*n)]
  y3 <- y[(2*n+1):(3*n)]
  y4 <- y[(3*n+1):(4*n)]
  
  E1 <- E[1:n]
  E2 <- E[(n+1):(2*n)]
  E3 <- E[(2*n+1):(3*n)]
  E4 <- E[(3*n+1):(4*n)]
  
  sigmasq_beta = 10000
  keepbeta=matrix(0,runs,p)
  keepphi=matrix(0,runs,nq)
  keeptheta=matrix(0,runs,n.atoms)
  keepA=matrix(0,runs,10)
  keepu=matrix(0,runs,nq)
  keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
  keepv=matrix(0,runs,n.atoms)
  keepr=matrix(0,runs,nq)
  keepFr=matrix(0,runs,nq)
  
  
  #initial values
  
  theta=rep(0,n.atoms)
  
  beta1<-rep(0,ncol(x1))
  beta2<-rep(0,ncol(x2))
  beta3<-rep(0,ncol(x3))
  beta4<-rep(0,ncol(x4))
  beta = c(beta1, beta2, beta3, beta4)
  
  taus = 1
  
  c=2
  d=0.1
  d2=1
  v<-rep(.1,n.atoms)
  probs=makeprobs(v)
  
  #A matrix
  rho=c(0.5, 0.5, 0.5, 0.5)
  eta = log(rho/(1-rho))
  A = matrix(0, q, q)
  for(i in 1:q){
    for(j in 1:i){
      if(j == i){
        A[i, j] = exp(rnorm(1))
      }else{
        A[i, j] = rnorm(1)
      }
    }
  }
  
  Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
  invQ1 = solve(Q[[1]])
  invQ2 = solve(Q[[2]])
  invQ3 = solve(Q[[3]])
  invQ4 = solve(Q[[4]])
  
  kprod = kronecker(A, diag(n))
  invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
  
  Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
  r = rmvnorm(1, rep(0, nq), Vr)
  
  
  F_r=pnorm(r,0,sqrt(diag(Vr)))
  u=makeu(F_r,probs)
  phi <- theta[u]
  
  nu = 2
  R = 0.1 * diag(q)
  
  
  acceptr=acceptv=acceptrho=acceptA=0
  acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
  accepttheta = 0
  
  count<-afterburn<-0
  burn = burn + 1

  for(iter in 1:runs){
    
    if(iter == runs){
      print(iter)
      print(acceptA/(iter-1)) 
      print(acceptr/nq/(iter-1))
      print(acceptrho/(iter-1)) 
      print(acceptv/n.atoms/(iter-1))
      print(accepttheta/n.atoms/(iter-1))
      print(acceptbeta1/(iter-1))
      print(acceptbeta2/(iter-1))
      print(acceptbeta3/(iter-1))
      print(acceptbeta4/(iter-1))
    }
    ######################
    ###   update beta  ###
    ###################### 
    # update beta (intercept only model)
    
    pro_beta1=rmvnorm(1,beta1,0.000003*diag(ncol(x1)))
    MHrate1=sum(-E1*exp(pro_beta1%*%t(x1)+theta[u][1:n])+y1*(pro_beta1%*%t(x1)+theta[u][1:n]))-
      sum(-E1*exp(beta1%*%t(x1)+theta[u][1:n])+y1*(beta1%*%t(x1)+theta[u][1:n]))  
    
    if(runif(1,0,1)<exp(MHrate1)){
      beta1<-pro_beta1
      acceptbeta1=acceptbeta1+1
    } 
    
    pro_beta2=rmvnorm(1,beta2,0.00003*diag(ncol(x2)))
    MHrate2=sum(-E2*exp(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)]))-
      sum(-E2*exp(beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(beta2%*%t(x2)+theta[u][(n+1):(2*n)]))  
    
    if(runif(1,0,1)<exp(MHrate2)){
      beta2<-pro_beta2
      acceptbeta2=acceptbeta2+1
    } 
    
    pro_beta3=rmvnorm(1,beta3,0.00005*diag(ncol(x3)))
    MHrate3=sum(-E3*exp(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))-
      sum(-E3*exp(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))  
    
    if(runif(1,0,1)<exp(MHrate3)){
      beta3<-pro_beta3
      acceptbeta3=acceptbeta3+1
    } 
    
    pro_beta4=rmvnorm(1,beta4,0.000003*diag(ncol(x4)))
    MHrate4=sum(-E4*exp(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))-
      sum(-E4*exp(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))  
    
    if(runif(1,0,1)<exp(MHrate4)){
      beta4<-pro_beta4
      acceptbeta4=acceptbeta4+1
    } 
    
    beta <- c(beta1, beta2, beta3, beta4)
    
    
    #########################
    ###   update theta    ###
    ######################### 
    
    u1 = u[1:n]
    u2 = u[(n+1):(2*n)]
    u3 = u[(2*n+1):(3*n)]
    u4 = u[(3*n+1):(4*n)]
    
    
    
    for (j in 1:n.atoms){
      
      count[j]=sum(u==j)
      
      if (count[j]==0){
        theta[j]=rnorm(1,0,sqrt(1/taus))
      }else{
        pro_theta=rnorm(1,theta[j],0.06)
        
        MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
          sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
        if(runif(1,0,1)<exp(MHrate)){
          theta[j]<-pro_theta
          accepttheta=accepttheta+1
        } 
      }
    }
    
    
    
    ######################
    ###   update r     ###
    ###################### 
    
    for (k in 1:nq){
      
      pro_r=r;pro_Fr=F_r;pro_u=u
      pro_r[k]=rnorm(1,r[k],1.8)
      pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))         
      pro_u[k]=makeu(pro_Fr[k],probs)
      
      MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
        dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T) 
      if(runif(1,0,1)<exp(MH)){
        r[k]=pro_r[k]
        F_r[k]=pro_Fr[k]
        u[k]=pro_u[k]
        acceptr=acceptr+1
      }
    }
    
    
    ######################
    ###   update   v   ###
    ###################### 
    
    
    #pro_v=v
    for (j in 1:n.atoms){
      #0.9
      pro_v=v
      pro_v[j]<-rnorm(1,v[j],0.04)
      while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.04)}
      #if(pro_v[j]>0 & pro_v[j]<1){
      pro_probs=makeprobs(pro_v)
      pro_u=makeu(F_r,pro_probs)
      
      MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
        log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
      
      
      if(runif(1,0,1)<exp(MH)){
        v[j]=pro_v[j]
        probs=pro_probs
        u=pro_u
        acceptv=acceptv+1   
      }
      # }
    }
    
    
    ######################
    ###   update taus  ###
    ###################### 
    
    
    taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
    
    
    ######################
    ###   update rho  ###
    ###################### 
    
    
    pro_eta1 = rnorm(1, eta[1], 0.5)
    pro_eta2 = rnorm(1, eta[2], 0.5)
    pro_eta3 = rnorm(1, eta[3], 0.5)
    pro_eta4 = rnorm(1, eta[4], 0.5)
    pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
    pro_rho = exp(pro_eta)/(1+exp(pro_eta))
    pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
    pro_invQ1 = solve(pro_Q[[1]])
    pro_invQ2 = solve(pro_Q[[2]])
    pro_invQ3 = solve(pro_Q[[3]])
    pro_invQ4 = solve(pro_Q[[4]])
    
    kprod = kronecker(A, diag(n))
    pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
    pro_Vr = kprod %*% pro_invQ %*% t(kprod)
    
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + 
      log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
      log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
      log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
    
    if(runif(1,0,1)<exp(MH)){
      eta = pro_eta
      rho = pro_rho
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptrho=acceptrho+1 
    }
    
    ######################
    ###   update A  ###
    ###################### 
    #0.06
    pro_A = matrix(0, q, q)
    for(i in 1:q){
      for(j in 1:i){
        if(j == i){
          pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.04))
          #pro_A[i, j] = rlnorm(1, log(A[i, j]), 0.1)
        }else{
          pro_A[i, j] = rnorm(1, A[i, j], 0.03)
        }
      }
    }
    
    Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
    invQ1 = solve(Q[[1]])
    invQ2 = solve(Q[[2]])
    invQ3 = solve(Q[[3]])
    invQ4 = solve(Q[[4]])
    Sigma =  A %*% t(A)
    
    pro_kprod = kronecker(pro_A, diag(n))
    invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
    pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
    pro_Sigma =  pro_A %*% t(pro_A)
    
    lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
    pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
    
    if(runif(1,0,1)<exp(MH)){
      A = pro_A
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptA=acceptA+1 
    }
    
    #kprod = kronecker(A, diag(n))
    #Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
    
    ######################
    ### record samples ###
    ###################### 
    
    keeptheta[iter,] = theta
    keepphi[iter,] = theta[u]
    
    keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
    
    keepbeta[iter,]=beta
    keeptaus[iter]=taus
    keeprho1[iter]=rho[1]
    keeprho2[iter]=rho[2]
    keeprho3[iter]=rho[3]
    keeprho4[iter]=rho[4]
    keepu[iter,]=u
    keepv[iter,]=v
    keepr[iter,]=r
    keepFr[iter,]=F_r
    
    #   cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
    
  }
  
  list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
       v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
       rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff_smoke(y=Y, x1=as.matrix(X1[,1:2]), x2=as.matrix(X2[,1:2]), x3 = as.matrix(X3[,1:2]), x4 = as.matrix(X4[,1:2]), 
                             X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2004067
## [1] 0.177686
## [1] 0.1337045
## [1] 0.1909753
## [1] 0.2943876
## [1] 0.190873
## [1] 0.2066069
## [1] 0.2041735
## [1] 0.2069069
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
4.1.1 Coefficients (covariates slopes) estimates
#Summary function
mysummary = function(invector) {
  c(mean(invector), quantile(invector, .025), quantile(invector,.975))
}
#Coefficients
cov_effect = round(t(apply(mcmc_samples$beta, 2, mysummary)),3)
beta_est = paste(cov_effect[,1], " (", cov_effect[,2], ", ", cov_effect[,3], ")", sep="")
beta_table = data.frame(t(matrix(beta_est, ncol = 2, byrow = TRUE)))
colnames(beta_table) = c("Lung", "Esophageal", "Larynx", "Colorectal")
rownames(beta_table) = c("Intercept", "Smoking")
kable(beta_table)
Lung Esophageal Larynx Colorectal
Intercept -0.066 (-0.109, -0.022) -0.101 (-0.194, 0.004) -0.127 (-0.337, 0.015) 0.063 (0.018, 0.105)
Smoking 0.02 (0.016, 0.025) 0.019 (0.008, 0.027) 0.019 (0.008, 0.034) -0.012 (-0.015, -0.009)
4.1.2 Difference boundaries for each cancer individually
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))



vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))



vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

#probabilities
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

# Use the same threshold as MDAGAR and identify difference boundary for each cancer individually
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}


edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}


edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 8. Difference boundaries (highlighted in red and blue) detected by MDAGAR after accounting for smoking rates.\n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 22)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,24, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5)) 

4.2 all covariates listed in Section 2.1

# Construct covariates matrix
X = as.matrix(bdiag(bdiag(X1, X2), bdiag(X3,X4)))

ARDP_joint_diff<-function(y=Y, x1=as.matrix(X1), x2=as.matrix(X2), x3 = as.matrix(X3), x4 = as.matrix(X4), 
                          X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
  #y:       data
  #x:       covariates
  #n.atoms: number of atoms in the mixture dist.
  #theta:      the theta's (iid from baseline) in the model
  #alpha:     v~beta(1,alpha)
  #u:       the index indicator of spatial random effect 
  #rho:     sptatial autocorrelation parameter in DAGAR
  #Minc:       0-1 adjacency matrix 
  #V_r:     covariance matrix of joint MDAGAR
  #Q:       presicion matrix of DAGAR
  #r:       random effects following DAGAR
  #F_r:     Marginal CDF of r
  #taued:   presicion in Baseline of DP for disease d   
  #taus:    precision for theta
  
  
  
  
  nq<-length(y)
  n <- nq / q
  p<-ncol(X)
  y1 <- y[1:n]
  y2 <- y[(n+1):(2*n)]
  y3 <- y[(2*n+1):(3*n)]
  y4 <- y[(3*n+1):(4*n)]
  
  E1 <- E[1:n]
  E2 <- E[(n+1):(2*n)]
  E3 <- E[(2*n+1):(3*n)]
  E4 <- E[(3*n+1):(4*n)]
  
  sigmasq_beta = 10000
  keepbeta=matrix(0,runs,p)
  keepphi=matrix(0,runs,nq)
  keeptheta=matrix(0,runs,n.atoms)
  keepA=matrix(0,runs,10)
  keepu=matrix(0,runs,nq)
  keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
  keepv=matrix(0,runs,n.atoms)
  keepr=matrix(0,runs,nq)
  keepFr=matrix(0,runs,nq)
  
  
  #initial values
  
  theta=rep(0,n.atoms)
  
  beta1<-rep(0,ncol(x1))
  beta2<-rep(0,ncol(x2))
  beta3<-rep(0,ncol(x3))
  beta4<-rep(0,ncol(x4))
  beta = c(beta1, beta2, beta3, beta4)
  
  taus = 1
  
  c=2
  d=0.1
  d2=1
  v<-rep(.1,n.atoms)
  probs=makeprobs(v)
  
  #A matrix
  rho=c(0.5, 0.5, 0.5, 0.5)
  eta = log(rho/(1-rho))
  A = matrix(0, q, q)
  for(i in 1:q){
    for(j in 1:i){
      if(j == i){
        A[i, j] = exp(rnorm(1))
      }else{
        A[i, j] = rnorm(1)
      }
    }
  }
  
  Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
  invQ1 = solve(Q[[1]])
  invQ2 = solve(Q[[2]])
  invQ3 = solve(Q[[3]])
  invQ4 = solve(Q[[4]])
  
  kprod = kronecker(A, diag(n))
  invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
  
  Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
  r = rmvnorm(1, rep(0, nq), Vr)
  
  
  F_r=pnorm(r,0,sqrt(diag(Vr)))
  u=makeu(F_r,probs)
  phi <- theta[u]
  
  nu = 2
  R = 0.1 * diag(q)
  
  
  acceptr=acceptv=acceptrho=acceptA=0
  acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
  accepttheta = 0
  
  count<-afterburn<-0
  burn = burn + 1

  for(iter in 1:runs){
    
    if(iter == runs){
      print(iter)
      print(acceptA/(iter-1)) 
      print(acceptr/nq/(iter-1))
      print(acceptrho/(iter-1)) 
      print(acceptv/n.atoms/(iter-1))
      print(accepttheta/n.atoms/(iter-1))
      print(acceptbeta1/(iter-1))
      print(acceptbeta2/(iter-1))
      print(acceptbeta3/(iter-1))
      print(acceptbeta4/(iter-1))
    }
    ######################
    ###   update beta  ###
    ###################### 
    # update beta (intercept only model)
    
    pro_beta1=rmvnorm(1,beta1,0.0000001*diag(ncol(x1)))
    MHrate1=sum(-E1*exp(pro_beta1%*%t(x1)+theta[u][1:n])+y1*(pro_beta1%*%t(x1)+theta[u][1:n]))-
      sum(-E1*exp(beta1%*%t(x1)+theta[u][1:n])+y1*(beta1%*%t(x1)+theta[u][1:n]))  
    
    if(runif(1,0,1)<exp(MHrate1)){
      beta1<-pro_beta1
      acceptbeta1=acceptbeta1+1
    } 
    
    pro_beta2=rmvnorm(1,beta2,0.000002*diag(ncol(x2)))
    MHrate2=sum(-E2*exp(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)]))-
      sum(-E2*exp(beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(beta2%*%t(x2)+theta[u][(n+1):(2*n)]))  
    
    if(runif(1,0,1)<exp(MHrate2)){
      beta2<-pro_beta2
      acceptbeta2=acceptbeta2+1
    } 
    
    pro_beta3=rmvnorm(1,beta3,0.000004*diag(ncol(x3)))
    MHrate3=sum(-E3*exp(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))-
      sum(-E3*exp(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))  
    
    if(runif(1,0,1)<exp(MHrate3)){
      beta3<-pro_beta3
      acceptbeta3=acceptbeta3+1
    } 
    
    pro_beta4=rmvnorm(1,beta4,0.0000002*diag(ncol(x4)))
    MHrate4=sum(-E4*exp(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))-
      sum(-E4*exp(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))  
    
    if(runif(1,0,1)<exp(MHrate4)){
      beta4<-pro_beta4
      acceptbeta4=acceptbeta4+1
    } 
    
    beta <- c(beta1, beta2, beta3, beta4)
    
    
    #########################
    ###   update theta    ###
    ######################### 
    
    u1 = u[1:n]
    u2 = u[(n+1):(2*n)]
    u3 = u[(2*n+1):(3*n)]
    u4 = u[(3*n+1):(4*n)]
    
    for (j in 1:n.atoms){
      
      count[j]=sum(u==j)
      
      if (count[j]==0){
        theta[j]=rnorm(1,0,sqrt(1/taus))
      }else{
        pro_theta=rnorm(1,theta[j],0.06)
        
        MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
          sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
        if(runif(1,0,1)<exp(MHrate)){
          theta[j]<-pro_theta
          accepttheta=accepttheta+1
        } 
      }
    }
    
    
    
    ######################
    ###   update r     ###
    ###################### 
    
    for (k in 1:nq){
      
      pro_r=r;pro_Fr=F_r;pro_u=u
      pro_r[k]=rnorm(1,r[k],1.8)
      pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))         
      pro_u[k]=makeu(pro_Fr[k],probs)
      
      MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
        dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
      if(runif(1,0,1)<exp(MH)){
        r[k]=pro_r[k]
        F_r[k]=pro_Fr[k]
        u[k]=pro_u[k]
        acceptr=acceptr+1
      }
    }
    
    
    ######################
    ###   update   v   ###
    ###################### 
    
    
    #pro_v=v
    for (j in 1:n.atoms){
      pro_v=v
      pro_v[j]<-rnorm(1,v[j],0.05)
      while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.05)}
      pro_probs=makeprobs(pro_v)
      pro_u=makeu(F_r,pro_probs)
      
      MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
        log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
      
      
      if(runif(1,0,1)<exp(MH)){
        v[j]=pro_v[j]
        probs=pro_probs
        u=pro_u
        acceptv=acceptv+1   
      }
      # }
    }
    
    
    ######################
    ###   update taus  ###
    ###################### 
    
    
    taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
    
    
    ######################
    ###   update rho  ###
    ###################### 
    
    
    pro_eta1 = rnorm(1, eta[1], 0.5)
    pro_eta2 = rnorm(1, eta[2], 0.5)
    pro_eta3 = rnorm(1, eta[3], 0.5)
    pro_eta4 = rnorm(1, eta[4], 0.5)
    pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
    pro_rho = exp(pro_eta)/(1+exp(pro_eta))
    pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
    pro_invQ1 = solve(pro_Q[[1]])
    pro_invQ2 = solve(pro_Q[[2]])
    pro_invQ3 = solve(pro_Q[[3]])
    pro_invQ4 = solve(pro_Q[[4]])
    
    kprod = kronecker(A, diag(n))
    pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
    pro_Vr = kprod %*% pro_invQ %*% t(kprod)
    
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + 
      log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
      log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
      log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
    
    if(runif(1,0,1)<exp(MH)){
      eta = pro_eta
      rho = pro_rho
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptrho=acceptrho+1 
    }
    
    ######################
    ###   update A  ###
    ###################### 
    #0.06
    pro_A = matrix(0, q, q)
    for(i in 1:q){
      for(j in 1:i){
        if(j == i){
          pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.03))
          #pro_A[i, j] = rlnorm(1, log(A[i, j]), 0.1)
        }else{
          pro_A[i, j] = rnorm(1, A[i, j], 0.03)
        }
      }
    }
    
    Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
    invQ1 = solve(Q[[1]])
    invQ2 = solve(Q[[2]])
    invQ3 = solve(Q[[3]])
    invQ4 = solve(Q[[4]])
    Sigma =  A %*% t(A)
    
    pro_kprod = kronecker(pro_A, diag(n))
    invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
    pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
    pro_Sigma =  pro_A %*% t(pro_A)
    
    lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
    pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
    MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA - 
      dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
    
    if(runif(1,0,1)<exp(MH)){
      A = pro_A
      Vr = as.matrix(forceSymmetric(pro_Vr))
      acceptA=acceptA+1 
    }
    
    ######################
    ### record samples ###
    ###################### 
    
    keeptheta[iter,] = theta
    keepphi[iter,] = theta[u]
    
    keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
    
    keepbeta[iter,]=beta
    keeptaus[iter]=taus
    keeprho1[iter]=rho[1]
    keeprho2[iter]=rho[2]
    keeprho3[iter]=rho[3]
    keeprho4[iter]=rho[4]
    keepu[iter,]=u
    keepv[iter,]=v
    keepr[iter,]=r
    keepFr[iter,]=F_r
    
    #   cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
    
  }
  
  list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
       v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
       rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff(y=Y, x1=as.matrix(X1), x2=as.matrix(X2), x3 = as.matrix(X3), x4 = as.matrix(X4), 
                             X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.1171039
## [1] 0.1961909
## [1] 0.1452048
## [1] 0.3426181
## [1] 0.3084636
## [1] 0.2891763
## [1] 0.2085736
## [1] 0.1963399
## [1] 0.2082736
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
4.2.1 Coefficients (covariates slopes) estimates
#Coefficients
cov_effect = round(t(apply(mcmc_samples$beta, 2, mysummary)),3)
beta_est = paste(cov_effect[,1], " (", cov_effect[,2], ", ", cov_effect[,3], ")", sep="")
beta_table = data.frame(t(matrix(beta_est, ncol = 8, byrow = TRUE)))
colnames(beta_table) = c("Lung", "Esophageal", "Larynx", "Colorectal")
rownames(beta_table) = c("Intercept", "Smoking", "Young", "Old", "Edu", "Poverty",
                         "Unemp", "Black" )
kable(beta_table)
Lung Esophageal Larynx Colorectal
Intercept 0.021 (0.008, 0.032) 0.056 (0.028, 0.091) 0.002 (-0.043, 0.05) 0.016 (0.002, 0.036)
Smoking 0.015 (0.011, 0.02) 0.016 (0.003, 0.031) 0.029 (0.01, 0.053) -0.002 (-0.007, 0.003)
Young -0.015 (-0.019, -0.012) -0.009 (-0.019, 0.001) -0.02 (-0.039, -0.006) -0.005 (-0.01, -0.002)
Old 0.029 (0.026, 0.032) 0.027 (0.017, 0.038) 0.018 (0.004, 0.034) 0.024 (0.021, 0.028)
Edu -0.012 (-0.016, -0.009) -0.026 (-0.035, -0.017) -0.001 (-0.016, 0.012) -0.006 (-0.009, -0.003)
Poverty -0.008 (-0.012, -0.003) -0.009 (-0.021, 0.004) -0.012 (-0.032, 0.01) -0.001 (-0.006, 0.004)
Unemp 0.042 (0.033, 0.051) 0.057 (0.036, 0.078) 0.037 (0.014, 0.062) 0.014 (0.007, 0.02)
Black -0.01 (-0.012, -0.007) -0.017 (-0.026, -0.007) -0.009 (-0.021, 0.004) 0.005 (0.001, 0.009)
4.2.2 Difference boundaries for each cancer individually
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)

vij_samples1 <- t(apply(phis1, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))



vij_samples2 <- t(apply(phis2, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

vij_samples3 <- t(apply(phis3, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))



vij_samples4 <- t(apply(phis4, 1, function(x){
  diff_b1 <- NULL
  for(i in 1:nrow(neighbor_list0)){
    if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
      diff_b1 <- c(diff_b1, 1)
    }else{
      diff_b1 <- c(diff_b1, 0)
    }
  }
  return(diff_b1)
}))

#probabilities
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)

names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name

T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))


for(i in 1:length(threshold1)){
  th1 <- threshold1[i]
  est_diff1 <- as.numeric(pvij1 >= th1)
  FDR_est1[i] <- sum((1-pvij1) * est_diff1)  / sum(est_diff1)
  
  th2 <- threshold2[i]
  est_diff2 <- as.numeric(pvij2 >= th2)
  FDR_est2[i] <- sum((1-pvij2) * est_diff2)  / sum(est_diff2)
  
  th3 <- threshold3[i]
  est_diff3 <- as.numeric(pvij3 >= th3)
  FDR_est3[i] <- sum((1-pvij3) * est_diff3)  / sum(est_diff3)
  
  th4 <- threshold4[i]
  est_diff4 <- as.numeric(pvij4 >= th4)
  FDR_est4[i] <- sum((1-pvij4) * est_diff4)  / sum(est_diff4)
}

# Use the same threshold as MDAGAR and identify difference boundary for each cancer individually
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)

est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])

edge_plot1 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff1 == 1)){
  edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}

edge_plot2 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff2 == 1)){
  edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}


edge_plot3 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff3 == 1)){
  edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}


edge_plot4 <- ggplot() +
  geom_polygon(data = ca.poly.df,  aes(long, lat, group = group),
               color = "black",
               fill = "white"
  )
for(i in which(est_diff4 == 1)){
  edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(size=16, face="bold",hjust = 0.5)) + 
    ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}

text <- "Figure 9. Difference boundaries (highlighted in red and blue) detected by MDAGAR after accounting for all covariate.\n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 22)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,24, "cm"))

ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))